Ejemplo n.º 1
0
def wait(master_count_filename):
    if not master_count_filename.exists():
        # this is an agent
        log.info("master_count file doesn't exist, not waiting")
        return

    cluster_size = int(utils.read_file_text(master_count_filename))
    log.info('Expected cluster size: {}'.format(cluster_size))

    try:
        zk_mode = get_zookeeper_mode()
    except ConnectionRefusedError:
        log.error('ZooKeeper not running')
        sys.exit(1)

    if cluster_size == 1:
        desired_modes = {ZK_MODE_STANDALONE}
    else:
        desired_modes = {ZK_MODE_FOLLOWER, ZK_MODE_LEADER}
    if zk_mode not in desired_modes:
        log.error('ZooKeeper not in correct mode: %s', zk_mode)
        sys.exit(1)

    log.info('ZooKeeper OK: %s', zk_mode)

    # Check Exhibitor, but do not fail if it shows unexpected results
    try:
        response = requests.get(EXHIBITOR_STATUS_URL)
    except requests.exceptions.ConnectionError as ex:
        log.error('Could not connect to exhibitor: {}'.format(ex))
        return
    if response.status_code != 200:
        log.error('Could not get exhibitor status: {}, Status code: {}'.format(
            EXHIBITOR_STATUS_URL, response.status_code))
        return

    try:
        data = response.json()
    except ValueError:
        log.error('Non-JSON returned by Exhibitor: %r', response.content)
        return

    serving = []
    leaders = []
    for node in data:
        if node['isLeader']:
            leaders.append(node['hostname'])
        if node['description'] == 'serving':
            serving.append(node['hostname'])

    log.info('ZK servers: %r leaders: %r', ','.join(serving),
             ','.join(leaders))
Ejemplo n.º 2
0
def wait(master_count_filename):
    if not master_count_filename.exists():
        # this is an agent
        log.info("master_count file doesn't exist, not waiting")
        return

    if try_shortcut():
        log.info("Shortcut succeeeded, assuming local zk is in good config state, not waiting for quorum.")
        return
    log.info('Shortcut failed, waiting for exhibitor to bring up zookeeper and stabilize')

    cluster_size = int(utils.read_file_text(master_count_filename))
    log.info('Expected cluster size: {}'.format(cluster_size))

    log.info('Waiting for ZooKeeper cluster to stabilize')

    try:
        response = requests.get(EXHIBITOR_STATUS_URL)
    except requests.exceptions.ConnectionError as ex:
        log.error('Could not connect to exhibitor: {}'.format(ex))
        sys.exit(1)

    if response.status_code != 200:
        log.error('Could not get exhibitor status: {}, Status code: {}'.format(
            EXHIBITOR_STATUS_URL, response.status_code))
        sys.exit(1)

    data = response.json()

    serving = []
    leaders = []
    for node in data:
        if node['isLeader']:
            leaders.append(node['hostname'])
        if node['description'] == 'serving':
            serving.append(node['hostname'])

    log.info(
        "Serving hosts: `%s`, leader: `%s`", ','.join(serving), ','.join(leaders))

    if len(serving) != cluster_size or len(leaders) != 1:
        msg_fmt = 'Expected {} servers and 1 leader, got {} servers and {} leaders'
        log.error(msg_fmt.format(cluster_size, len(serving), len(leaders)))
        sys.exit(1)

    # Local Zookeeper is up. Config should be stable, local zookeeper happy. Stash the PID so if
    # there is a restart we can come up quickly without requiring a new zookeeper quorum.
    zk_pid_mtime = get_zk_pid_mtime()
    if zk_pid_mtime is not None:
        log.info('Stashing zk.pid mtime %s to %s', zk_pid_mtime, stash_zk_pid_stat_mtime_path)
        utils.write_private_file(stash_zk_pid_stat_mtime_path, str(zk_pid_mtime).encode('utf8'))
Ejemplo n.º 3
0
def get_zk_pid():
    return utils.read_file_text(zk_pid_path)