def check_broker_id_in_zk(broker_id_policy, process, region): """ Check endlessly for the Zookeeper Connection. This function checks endlessly if the broker is still registered in ZK (we observered running brokers but missing broker id's so we implemented this check) and if the ZK IP's changed (e.g. due to a node restart). If this happens a Kafka restart is enforced. """ from kazoo.client import KazooClient zk_conn_str = os.getenv('ZOOKEEPER_CONN_STRING') broker_id_manager = find_out_own_id.get_broker_policy(broker_id_policy) broker_id = broker_id_manager.get_id(kafka_data_dir) logging.info("check broker id... {}".format(broker_id)) if not broker_id: broker_id = wait_for_broker_id(broker_id_manager, kafka_data_dir) while True: check_kafka(region) new_zk_conn_str = generate_zk_conn_str.run(os.getenv('ZOOKEEPER_STACK_NAME'), region) if zk_conn_str != new_zk_conn_str: logging.warning("ZooKeeper connection string changed!") logging.warning("new ZK: " + new_zk_conn_str) logging.warning("old ZK: " + zk_conn_str) zk_conn_str = new_zk_conn_str os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str create_broker_properties(zk_conn_str) from random import randint wait_to_stop = randint(1, 10) logging.info("Waiting " + str(wait_to_stop) + " seconds to stop kafka broker ...") sleep(wait_to_stop) process.terminate() process.wait() wait_to_restart = randint(10, 20) logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...") sleep(wait_to_restart) logging.info("Restarting kafka broker with new ZooKeeper connection string ...") process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"]) os.environ['WAIT_FOR_KAFKA'] = 'yes' continue zk = KazooClient(hosts=zk_conn_str) zk.start() try: zk.get("/brokers/ids/" + broker_id) logging.info("I'm still in ZK registered, all good!") sleep(60) zk.stop() except: logging.warning("I'm not in ZK registered, stopping kafka broker process!") zk.stop() process.terminate() process.wait() logging.info("Restarting kafka broker ...") process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"]) os.environ['WAIT_FOR_KAFKA'] = 'yes'
def check_broker_id_in_zk(broker_id, process): """ Check endlessly for the Zookeeper Connection. This function checks endlessly if the broker is still registered in ZK (we observered running brokers but missing broker id's so we implemented this check) and if the ZK IP's changed (e.g. due to a node restart). If this happens a Kafka restart is enforced. """ import requests from time import sleep from kazoo.client import KazooClient zk_conn_str = os.getenv('ZOOKEEPER_CONN_STRING') while True: if os.getenv('WAIT_FOR_KAFKA') != 'no': ip = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document').json()['privateIp'] wait_for_kafka_startup.run(ip) os.environ['WAIT_FOR_KAFKA'] = 'no' new_zk_conn_str = generate_zk_conn_str.run(os.getenv('ZOOKEEPER_STACK_NAME'), region) if zk_conn_str != new_zk_conn_str: logging.warning("ZooKeeper connection string changed!") logging.warning("new ZK: " + new_zk_conn_str) logging.warning("old ZK: " + zk_conn_str) zk_conn_str = new_zk_conn_str os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str create_broker_properties(zk_conn_str) from random import randint wait_to_kill = randint(1, 10) logging.info("Waiting " + str(wait_to_kill) + " seconds to kill kafka broker ...") sleep(wait_to_kill) process.kill() wait_to_restart = randint(10, 20) logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...") sleep(wait_to_restart) logging.info("Restarting kafka broker with new ZooKeeper connection string ...") process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"]) os.environ['WAIT_FOR_KAFKA'] = 'yes' continue zk = KazooClient(hosts=zk_conn_str) zk.start() try: zk.get("/brokers/ids/" + broker_id) logging.info("I'm still in ZK registered, all good!") sleep(60) zk.stop() except: logging.warning("I'm not in ZK registered, killing kafka broker process!") zk.stop() process.kill() logging.info("Restarting kafka broker ...") process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"]) os.environ['WAIT_FOR_KAFKA'] = 'yes'
def check_broker_id_in_zk(broker_id, process): import requests from time import sleep from kazoo.client import KazooClient zk_conn_str = os.getenv('ZOOKEEPER_CONN_STRING') while True: if os.getenv('WAIT_FOR_KAFKA') != 'no': ip = requests.get( 'http://169.254.169.254/latest/dynamic/instance-identity/document' ).json()['privateIp'] wait_for_kafka_startup.run(ip) os.environ['WAIT_FOR_KAFKA'] = 'no' new_zk_conn_str = generate_zk_conn_str.run( os.getenv('ZOOKEEPER_STACK_NAME'), region) if zk_conn_str != new_zk_conn_str: logging.warning("ZooKeeper connection string changed!") zk_conn_str = new_zk_conn_str os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str create_broker_properties(zk_conn_str) from random import randint wait_to_restart = randint(1, 20) logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...") sleep(wait_to_restart) process.kill() logging.info( "Restarting kafka broker with new ZooKeeper connection string ..." ) process = subprocess.Popen([ kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties" ]) os.environ['WAIT_FOR_KAFKA'] = 'yes' continue zk = KazooClient(hosts=zk_conn_str) zk.start() try: zk.get("/brokers/ids/" + broker_id) logging.info("I'm still in ZK registered, all good!") sleep(60) zk.stop() except: logging.warning( "I'm not in ZK registered, killing kafka broker process!") zk.stop() process.kill() logging.info("Restarting kafka broker ...") process = subprocess.Popen([ kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties" ]) os.environ['WAIT_FOR_KAFKA'] = 'yes'
def check_broker_id_in_zk(broker_id, process): import requests from time import sleep from kazoo.client import KazooClient zk_conn_str = os.getenv("ZOOKEEPER_CONN_STRING") while True: if os.getenv("WAIT_FOR_KAFKA") != "no": ip = requests.get("http://169.254.169.254/latest/dynamic/instance-identity/document").json()["privateIp"] wait_for_kafka_startup.run(ip) os.environ["WAIT_FOR_KAFKA"] = "no" new_zk_conn_str = generate_zk_conn_str.run(os.getenv("ZOOKEEPER_STACK_NAME"), region) if zk_conn_str != new_zk_conn_str: logging.warning("ZooKeeper connection string changed!") zk_conn_str = new_zk_conn_str os.environ["ZOOKEEPER_CONN_STRING"] = zk_conn_str create_broker_properties(zk_conn_str) from random import randint wait_to_restart = randint(1, 20) logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...") sleep(wait_to_restart) process.kill() logging.info("Restarting kafka broker with new ZooKeeper connection string ...") process = subprocess.Popen( [kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"] ) os.environ["WAIT_FOR_KAFKA"] = "yes" continue zk = KazooClient(hosts=zk_conn_str) zk.start() try: zk.get("/brokers/ids/" + broker_id) logging.info("I'm still in ZK registered, all good!") sleep(60) zk.stop() except: logging.warning("I'm not in ZK registered, killing kafka broker process!") zk.stop() process.kill() logging.info("Restarting kafka broker ...") process = subprocess.Popen( [kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"] ) os.environ["WAIT_FOR_KAFKA"] = "yes"
import generate_zk_conn_str kafka_dir = os.getenv('KAFKA_DIR') logging.basicConfig(level=getattr(logging, 'INFO', None)) try: logging.info("Checking if we are on AWS or not ...") response = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document', timeout=5) json = response.json() region = json['region'] except requests.exceptions.ConnectionError: logging.info("Seems like this is a local environment, we will run now in local mode") region = None zk_conn_str = generate_zk_conn_str.run(os.getenv('ZOOKEEPER_STACK_NAME'), region) os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str logging.info("Got ZooKeeper connection string: " + zk_conn_str) def get_remote_config(file, url): logging.info("getting " + file + " file from " + url) file_ = open(file, 'w') file_.write(requests.get(url).text) file_.close def create_broker_properties(zk_conn_str): with open(kafka_dir + '/config/server.properties', "r+") as f: lines = f.read().splitlines()
from multiprocessing import Pool import wait_for_kafka_startup import generate_zk_conn_str kafka_dir = os.getenv('KAFKA_DIR') logging.basicConfig(level=getattr(logging, 'INFO', None)) try: response = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document') json = response.json() region = json['region'] except requests.exceptions.ConnectionError: region = None zk_conn_str = generate_zk_conn_str.run(os.getenv('ZOOKEEPER_STACK_NAME'), region) os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str logging.info("Got ZooKeeper connection string: " + zk_conn_str) def create_broker_properties(zk_conn_str): with open(kafka_dir + '/config/server.properties', "r+") as f: lines = f.read().splitlines() f.seek(0) f.truncate() f.write('zookeeper.connect=' + zk_conn_str + '\n') for line in lines: if not line.startswith("zookeeper.connect"): f.write(line + '\n') f.close()