Esempio n. 1
0
def check_broker_id_in_zk(broker_id, process):
    """
    Check endlessly for the Zookeeper Connection.

    This function checks endlessly if the broker is still registered in ZK
    (we observered running brokers but missing broker id's so we implemented this check)
    and if the ZK IP's changed (e.g. due to a node restart). If this happens a Kafka restart is enforced.
    """
    import requests
    from time import sleep
    from kazoo.client import KazooClient
    zk_conn_str = os.getenv('ZOOKEEPER_CONN_STRING')
    while True:
        if os.getenv('WAIT_FOR_KAFKA') != 'no':
            ip = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document').json()['privateIp']
            wait_for_kafka_startup.run(ip)
            os.environ['WAIT_FOR_KAFKA'] = 'no'

        new_zk_conn_str = generate_zk_conn_str.run(os.getenv('ZOOKEEPER_STACK_NAME'), region)
        if zk_conn_str != new_zk_conn_str:
            logging.warning("ZooKeeper connection string changed!")
            logging.warning("new ZK: " + new_zk_conn_str)
            logging.warning("old ZK: " + zk_conn_str)
            zk_conn_str = new_zk_conn_str
            os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str
            create_broker_properties(zk_conn_str)
            from random import randint
            wait_to_kill = randint(1, 10)
            logging.info("Waiting " + str(wait_to_kill) + " seconds to kill kafka broker ...")
            sleep(wait_to_kill)
            process.kill()
            wait_to_restart = randint(10, 20)
            logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...")
            sleep(wait_to_restart)
            logging.info("Restarting kafka broker with new ZooKeeper connection string ...")
            process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh",
                                        kafka_dir + "/config/server.properties"])
            os.environ['WAIT_FOR_KAFKA'] = 'yes'
            continue

        zk = KazooClient(hosts=zk_conn_str)
        zk.start()
        try:
            zk.get("/brokers/ids/" + broker_id)
            logging.info("I'm still in ZK registered, all good!")
            sleep(60)
            zk.stop()
        except:
            logging.warning("I'm not in ZK registered, killing kafka broker process!")
            zk.stop()
            process.kill()
            logging.info("Restarting kafka broker ...")
            process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh",
                                        kafka_dir + "/config/server.properties"])
            os.environ['WAIT_FOR_KAFKA'] = 'yes'
Esempio n. 2
0
def check_broker_id_in_zk(broker_id, process):
    import requests
    from time import sleep
    from kazoo.client import KazooClient
    zk_conn_str = os.getenv('ZOOKEEPER_CONN_STRING')
    while True:
        if os.getenv('WAIT_FOR_KAFKA') != 'no':
            ip = requests.get(
                'http://169.254.169.254/latest/dynamic/instance-identity/document'
            ).json()['privateIp']
            wait_for_kafka_startup.run(ip)
            os.environ['WAIT_FOR_KAFKA'] = 'no'

        new_zk_conn_str = generate_zk_conn_str.run(
            os.getenv('ZOOKEEPER_STACK_NAME'), region)
        if zk_conn_str != new_zk_conn_str:
            logging.warning("ZooKeeper connection string changed!")
            zk_conn_str = new_zk_conn_str
            os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str
            create_broker_properties(zk_conn_str)
            from random import randint
            wait_to_restart = randint(1, 20)
            logging.info("Waiting " + str(wait_to_restart) +
                         " seconds to restart kafka broker ...")
            sleep(wait_to_restart)
            process.kill()
            logging.info(
                "Restarting kafka broker with new ZooKeeper connection string ..."
            )
            process = subprocess.Popen([
                kafka_dir + "/bin/kafka-server-start.sh",
                kafka_dir + "/config/server.properties"
            ])
            os.environ['WAIT_FOR_KAFKA'] = 'yes'
            continue

        zk = KazooClient(hosts=zk_conn_str)
        zk.start()
        try:
            zk.get("/brokers/ids/" + broker_id)
            logging.info("I'm still in ZK registered, all good!")
            sleep(60)
            zk.stop()
        except:
            logging.warning(
                "I'm not in ZK registered, killing kafka broker process!")
            zk.stop()
            process.kill()
            logging.info("Restarting kafka broker ...")
            process = subprocess.Popen([
                kafka_dir + "/bin/kafka-server-start.sh",
                kafka_dir + "/config/server.properties"
            ])
            os.environ['WAIT_FOR_KAFKA'] = 'yes'
Esempio n. 3
0
def check_kafka():
    import requests
    try:
        if os.getenv('WAIT_FOR_KAFKA') != 'no':
            logging.info("wait for kafka in broker check")
            ip = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document').json()['privateIp']
            logging.info("wait for kafka in broker check - ip {}".format(ip))
            wait_for_kafka_startup.run(ip)
            logging.info("wait for kafka in broker check - ok")
            os.environ['WAIT_FOR_KAFKA'] = 'no'
    except:
        logging.info("exception on checking kafka")
Esempio n. 4
0
def run():
    replication_factor = 3
    zookeeper_connect_string = os.getenv('ZOOKEEPER_CONN_STRING')
    logging.info("waiting for kafka to start up")
    if os.getenv('WAIT_FOR_KAFKA') != 'no':
        wait_for_kafka_startup.run(get_own_ip())
    else:
        sleep(10)

    logging.info("kafka port is open, continuing")

    zk = KazooClient(hosts=zookeeper_connect_string)
    zk.start()
    zk.add_listener(state_listener)

    logging.info("connected to Zookeeper")

    zk_dict = get_zk_dict(zk)
    result = generate_json(zk_dict, replication_factor, broken_topics=True)
    if result != {}:
        logging.info("JSON generated")
        logging.info("there are " + str(len(result['partitions'])) +
                     " partitions to repair")
        logging.debug(result)
        if os.getenv('WRITE_TO_JSON') != 'no':
            write_json_to_zk(zk, result)
    else:
        logging.info("no JSON generated")
        needed = True
        for broker in zk_dict['broker']:
            if int(get_broker_weight(zk_dict, {'partitions': []},
                                     broker)) == 0:
                needed = True
        if needed is True:
            result = generate_json(zk_dict,
                                   replication_factor,
                                   broken_topics=False)
            if result != {}:

                logging.info("JSON generated")
                if os.getenv('WRITE_TO_JSON') != 'no':
                    write_json_to_zk(zk, result)
        else:
            logging.info("no unused Broker found")

    zk.stop()
    logging.info("exiting")
def check_broker_id_in_zk(broker_id, process):
    import requests
    from time import sleep
    from kazoo.client import KazooClient

    zk_conn_str = os.getenv("ZOOKEEPER_CONN_STRING")
    while True:
        if os.getenv("WAIT_FOR_KAFKA") != "no":
            ip = requests.get("http://169.254.169.254/latest/dynamic/instance-identity/document").json()["privateIp"]
            wait_for_kafka_startup.run(ip)
            os.environ["WAIT_FOR_KAFKA"] = "no"

        new_zk_conn_str = generate_zk_conn_str.run(os.getenv("ZOOKEEPER_STACK_NAME"), region)
        if zk_conn_str != new_zk_conn_str:
            logging.warning("ZooKeeper connection string changed!")
            zk_conn_str = new_zk_conn_str
            os.environ["ZOOKEEPER_CONN_STRING"] = zk_conn_str
            create_broker_properties(zk_conn_str)
            from random import randint

            wait_to_restart = randint(1, 20)
            logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...")
            sleep(wait_to_restart)
            process.kill()
            logging.info("Restarting kafka broker with new ZooKeeper connection string ...")
            process = subprocess.Popen(
                [kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"]
            )
            os.environ["WAIT_FOR_KAFKA"] = "yes"
            continue

        zk = KazooClient(hosts=zk_conn_str)
        zk.start()
        try:
            zk.get("/brokers/ids/" + broker_id)
            logging.info("I'm still in ZK registered, all good!")
            sleep(60)
            zk.stop()
        except:
            logging.warning("I'm not in ZK registered, killing kafka broker process!")
            zk.stop()
            process.kill()
            logging.info("Restarting kafka broker ...")
            process = subprocess.Popen(
                [kafka_dir + "/bin/kafka-server-start.sh", kafka_dir + "/config/server.properties"]
            )
            os.environ["WAIT_FOR_KAFKA"] = "yes"
def run():
    replication_factor = 3
    zookeeper_connect_string = os.getenv('ZOOKEEPER_CONN_STRING')
    logging.info("waiting for kafka to start up")
    if os.getenv('WAIT_FOR_KAFKA') != 'no':
        wait_for_kafka_startup.run(get_own_ip())
    else:
        sleep(10)

    logging.info("kafka port is open, continuing")

    zk = KazooClient(hosts=zookeeper_connect_string)
    zk.start()
    zk.add_listener(state_listener)

    logging.info("connected to Zookeeper")

    zk_dict = get_zk_dict(zk)
    result = generate_json(zk_dict, replication_factor, broken_topics=True)
    if result != {}:
        logging.info("JSON generated")
        logging.info("there are " + str(len(result['partitions'])) + " partitions to repair")
        logging.debug(result)
        if os.getenv('WRITE_TO_JSON') != 'no':
            write_json_to_zk(zk, result)
    else:
        logging.info("no JSON generated")
        needed = True
        for broker in zk_dict['broker']:
            if int(get_broker_weight(zk_dict, {'partitions': []}, broker)) == 0:
                needed = True
        if needed is True:
            result = generate_json(zk_dict, replication_factor, broken_topics=False)
            if result != {}:

                logging.info("JSON generated")
                if os.getenv('WRITE_TO_JSON') != 'no':
                    write_json_to_zk(zk, result)
        else:
            logging.info("no unused Broker found")

    zk.stop()
    logging.info("exiting")
def run():
    import wait_for_kafka_startup
    logging.info("waiting for kafka to start up")
    if os.getenv('WAIT_FOR_KAFKA') != 'no':
        wait_for_kafka_startup.run(get_own_ip())
    else:
        sleep(10)

    logging.info("kafka port is open, continuing")

    zk = connect_to_zk()
    zk_dict = get_zk_dict(zk)

    logging.info("checking for broken topics")
    result = generate_json(zk_dict, topics_to_reassign=check_for_broken_partitions(zk_dict))
    if result != {}:
        logging.info("there are %s partitions to repair", len(result['partitions']))
        logging.debug(result)
        if os.getenv('WRITE_TO_JSON') != 'no':
            logging.info("writing to ZooKeeper ...")
            write_json_to_zk(zk, result)
    else:
        logging.info("no broken topics found, no JSON generated")

        if any(weight == 0 for weight in get_broker_weights(zk_dict).values()):
            logging.info("there are unused brokers, reassigning all topics ...")
            result = generate_json(zk_dict)
            if result != {}:
                logging.info("JSON generated")
                logging.debug(result)
                if os.getenv('WRITE_TO_JSON') != 'no':
                    write_json_to_zk(zk, result)
        else:
            logging.info("no unused Broker found, no JSON generated")

    zk.stop()
    logging.info("exiting")
def run():
    import wait_for_kafka_startup
    logging.info("waiting for kafka to start up")
    if os.getenv('WAIT_FOR_KAFKA') != 'no':
        wait_for_kafka_startup.run(get_own_ip())
    else:
        sleep(10)

    logging.info("kafka port is open, continuing")

    zk = connect_to_zk()
    zk_dict = get_zk_dict(zk)

    logging.info("checking for broken topics")
    result = generate_json(zk_dict, topics_to_reassign=check_for_broken_partitions(zk_dict))
    if result != {}:
        logging.info("there are %s partitions to repair", len(result['partitions']))
        logging.debug(result)
        if os.getenv('WRITE_TO_JSON') != 'no':
            logging.info("writing to ZooKeeper ...")
            write_json_to_zk(zk, result)
    else:
        logging.info("no broken topics found, no JSON generated")

        if any(weight == 0 for weight in get_broker_weights(zk_dict).values()):
            logging.info("there are unused brokers, reassigning all topics ...")
            result = generate_json(zk_dict)
            if result != {}:
                logging.info("JSON generated")
                logging.debug(result)
                if os.getenv('WRITE_TO_JSON') != 'no':
                    write_json_to_zk(zk, result)
        else:
            logging.info("no unused Broker found, no JSON generated")

    zk.stop()
    logging.info("exiting")