Skip to content

meanother/HeadHunter-ETL

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

16 Commits
 
 
 
 
 
 
 
 

Repository files navigation

HeadHunter kafka ETL

create kafka inst

adduser apache
adduser apache sudo

wget https://downloads.apache.org/kafka/2.5.0/kafka_2.12-2.5.0.tgz
wget https://downloads.apache.org/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz
tar -zxf

Zookeeper.service

vim zoo.cfg

tickTime=2000
dataDir=/home/apache/template
clientPort=2181
host.name=35.230.42.114
maxClientCnxns=60

systemd zookeeper.service

[Unit]
Requires=network.target
After=network.target

[Service]
Type=forking
User=apache
ExecStart=/home/apache/zookeeper/bin/zkServer.sh start /home/apache/zookeeper/conf/zoo.cfg
ExecStop=/home/apache/zookeeper/bin/zkServer.sh stop /home/apache/zookeeper/conf/zoo.cfg
ExecReload=/home/apache/zookeeper/bin/zkServer.sh restart /home/apache/zookeeper/conf/zoo.cfg
Restart=on-abnormal
TimeoutSec=30
Restart=on-failure

[Install]
WantedBy=multi-user.target

kafka.service

server.properties

broker.id=0
listeners=PLAINTEXT://:9092
advertised.listeners=PLAINTEXT://35.230.42.114:9092
num.network.threads=3
zookeeper.connect=35.230.42.114:2181
message.max.bytes=1994857600
max.message.bytes=1994857600
#message.max.bytes=1702390132

systemd kafka.service

[Unit]
Requires=zookeeper.service
After=zookeeper.service

[Service]
Type=simple
User=apache
ExecStart=/bin/sh -c '/home/apache/kafka/bin/kafka-server-start.sh /home/apache/kafka/config/server.properties > /home/apache/kafka.log 2>&1'
ExecStop=/home/apache/kafka/bin/kafka-server-stop.sh
Restart=on-abnormal

[Install]
WantedBy=multi-user.target

Create topic

kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 2 --topic HeadHunterETL

sudo systemctl daemon-reload

make install python 3.7

wget https://www.python.org/ftp/python/3.7.3/Python-3.7.3.tgz ; \
tar xvf Python-3.7.* ; \
cd Python-3.7.3 ; \
mkdir ~/.python ; \
./configure --enable-optimizations --prefix=/home/www/.python ; \
make -j8 ; \
sudo make altinstall

create spark stand-alone app

wget https://archive.apache.org/dist/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
sudo tar xvf spark-2.4.5-bin-hadoop2.7.tgz -C /home/apache/spark
sudo ./home/apache/spark/sbin/start-master.sh

sudo ./home/apache/spark/sbin/start-slave.sh spark://serverpromise:7077
or
sudo ./home/apache/spark/sbin/start-slave.sh --cores 4 --memory 4g --port 22445 spark://35.230.42.114:7077

About

kafka ETL api.hh

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages