Ejemplo n.º 1
0
def check_cluster_health():
    """report on the cluster health every 5 minutes"""
    etcdctl = EtcdCtl()
    health = etcdctl.cluster_health()

    # Determine if the unit is healthy or unhealthy
    if "unhealthy" in health["status"]:
        unit_health = "UnHealthy"
    else:
        unit_health = "Healthy"

    # Determine units peer count, and surface 0 by default
    try:
        peers = len(etcdctl.member_list())
    except Exception:
        unit_health = "Errored"
        peers = 0

    bp = "{0} with {1} known peer{2}"
    status_message = bp.format(unit_health, peers, "s" if peers != 1 else "")

    if unit_health in ["UnHealthy", "Errored"]:
        status.blocked(status_message)
    else:
        status.active(status_message)
Ejemplo n.º 2
0
def initialize_new_leader():
    ''' Create an initial cluster string to bring up a single member cluster of
    etcd, and set the leadership data so the followers can join this one. '''
    bag = EtcdDatabag()
    bag.token = bag.token
    bag.cluster_state = 'new'
    cluster_connection_string = get_connection_string([bag.private_address],
                                                      bag.management_port)
    bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string)
    render('defaults', '/etc/default/etcd', bag.__dict__, owner='root',
           group='root')
    host.service_restart('etcd')

    # sorry, some hosts need this. The charm races with systemd and wins.
    time.sleep(2)

    # Check health status before we say we are good
    etcdctl = EtcdCtl()
    status = etcdctl.cluster_health()
    if 'unhealthy' in status:
        status_set('blocked', 'Cluster not healthy')
        return
    # We have a healthy leader, broadcast initial data-points for followers
    open_port(bag.port)
    leader_connection_string = get_connection_string([bag.private_address],
                                                     bag.port)
    leader_set({'token': bag.token,
                'leader_address': leader_connection_string,
                'cluster': bag.cluster})

    # finish bootstrap delta and set configured state
    set_state('etcd.leader.configured')
Ejemplo n.º 3
0
def initialize_new_leader():
    ''' Create an initial cluster string to bring up a single member cluster of
    etcd, and set the leadership data so the followers can join this one. '''
    bag = EtcdDatabag()
    bag.token = bag.token
    bag.cluster_state = 'new'
    address = get_ingress_address('cluster')
    cluster_connection_string = get_connection_string([address],
                                                      bag.management_port)
    bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string)

    render_config(bag)
    host.service_restart(bag.etcd_daemon)

    # sorry, some hosts need this. The charm races with systemd and wins.
    time.sleep(2)

    # Check health status before we say we are good
    etcdctl = EtcdCtl()
    status = etcdctl.cluster_health()
    if 'unhealthy' in status:
        status_set('blocked', 'Cluster not healthy.')
        return
    # We have a healthy leader, broadcast initial data-points for followers
    open_port(bag.port)
    leader_connection_string = get_connection_string([address],
                                                     bag.port)
    leader_set({'token': bag.token,
                'leader_address': leader_connection_string,
                'cluster': bag.cluster})

    # finish bootstrap delta and set configured state
    set_state('etcd.leader.configured')
Ejemplo n.º 4
0
def initialize_new_leader():
    """Create an initial cluster string to bring up a single member cluster of
    etcd, and set the leadership data so the followers can join this one."""
    bag = EtcdDatabag()
    bag.token = bag.token
    bag.set_cluster_state("new")
    address = get_ingress_address("cluster")
    cluster_connection_string = get_connection_string([address], bag.management_port)
    bag.set_cluster("{}={}".format(bag.unit_name, cluster_connection_string))

    render_config(bag)
    host.service_restart(bag.etcd_daemon)

    # sorry, some hosts need this. The charm races with systemd and wins.
    time.sleep(2)

    # Check health status before we say we are good
    etcdctl = EtcdCtl()
    status = etcdctl.cluster_health()
    if "unhealthy" in status:
        status.blocked("Cluster not healthy.")
        return
    # We have a healthy leader, broadcast initial data-points for followers
    open_port(bag.port)
    leader_connection_string = get_connection_string([address], bag.port)
    leader_set({"leader_address": leader_connection_string, "cluster": bag.cluster})

    # set registered state since if we ever become a follower, we will not need
    # to re-register
    set_state("etcd.registered")

    # finish bootstrap delta and set configured state
    set_state("etcd.leader.configured")
Ejemplo n.º 5
0
def register_node_with_leader(cluster):
    '''
    Control flow mechanism to perform self registration with the leader.

    Before executing self registration, we must adhere to the nature of offline
    static turnup rules. If we find a GUID in the member list without peering
    information the unit will enter a race condition and must wait for a clean
    status output before we can progress to self registration.
    '''
    # We're going to communicate with the leader, and we need our bootstrap
    # startup string once.. TBD after that.
    etcdctl = EtcdCtl()
    bag = EtcdDatabag()
    # Assume a hiccup during registration and attempt a retry
    if bag.cluster_unit_id:
        bag.cluster = bag.registration_peer_string
        render('defaults', '/etc/default/etcd', bag.__dict__)
        host.service_restart('etcd')
        time.sleep(2)

    peers = etcdctl.member_list(leader_get('leader_address'))
    for unit in peers:
        if 'client_urls' not in peers[unit].keys():
            # we cannot register. State not attainable.
            msg = 'Waiting for unit to complete registration'
            status_set('waiting', msg)
            return

    if not bag.cluster_unit_id:
        bag.leader_address = leader_get('leader_address')
        resp = etcdctl.register(bag.__dict__)
        if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys():  # noqa
            bag.cache_registration_detail('cluster_unit_id',
                                          resp['cluster_unit_id'])
            bag.cache_registration_detail('registration_peer_string',
                                          resp['cluster'])

            bag.cluster_unit_id = resp['cluster_unit_id']
            bag.cluster = resp['cluster']

    render('defaults', '/etc/default/etcd', bag.__dict__)
    host.service_restart('etcd')
    time.sleep(2)

    # Check health status before we say we are good
    etcdctl = EtcdCtl()
    status = etcdctl.cluster_health()
    if 'unhealthy' in status:
        status_set('blocked', 'Cluster not healthy')
        return
    open_port(bag.port)
    set_state('etcd.registered')
Ejemplo n.º 6
0
def check_cluster_health():
    ''' report on the cluster health every 5 minutes'''
    etcdctl = EtcdCtl()
    health = etcdctl.cluster_health()

    # Determine if the unit is healthy or unhealthy
    if 'healthy' in health['status']:
        unit_health = "Healthy"
    else:
        unit_health = "Unhealthy"

    # Determine units peer count, and surface 0 by default
    try:
        peers = len(etcdctl.member_list())
    except Exception:
        peers = 0

    status_message = "{0} with {1} known peers.".format(unit_health, peers)
    status_set('active', status_message)
Ejemplo n.º 7
0
def check_cluster_health():
    ''' report on the cluster health every 5 minutes'''
    etcdctl = EtcdCtl()
    health = etcdctl.cluster_health()

    # Determine if the unit is healthy or unhealthy
    if 'unhealthy' in health['status']:
        unit_health = "UnHealthy"
    else:
        unit_health = "Healthy"

    # Determine units peer count, and surface 0 by default
    try:
        peers = len(etcdctl.member_list())
    except Exception:
        unit_health = "Errored"
        peers = 0

    bp = "{0} with {1} known peer{2}"
    status_message = bp.format(unit_health, peers, 's' if peers != 1 else '')

    status_set('active', status_message)
Ejemplo n.º 8
0
def register_node_with_leader(cluster):
    '''
    Control flow mechanism to perform self registration with the leader.

    Before executing self registration, we must adhere to the nature of offline
    static turnup rules. If we find a GUID in the member list without peering
    information the unit will enter a race condition and must wait for a clean
    status output before we can progress to self registration.
    '''
    # We're going to communicate with the leader, and we need our bootstrap
    # startup string once.. TBD after that.
    etcdctl = EtcdCtl()
    bag = EtcdDatabag()
    # Assume a hiccup during registration and attempt a retry
    if bag.cluster_unit_id:
        bag.cluster = bag.registration_peer_string
        # conf_path = '{}/etcd.conf'.format(bag.etcd_conf_dir)
        render_config(bag)
        time.sleep(2)

    try:
        peers = etcdctl.member_list(leader_get('leader_address'))
    except CalledProcessError:
        log("Etcd attempted to invoke registration before service ready")
        # This error state is transient, and does not imply the unit is broken.
        # Erroring at this stage can be resolved, and should not effect the
        # overall condition of unit turn-up. Return from the method and let the
        # charm re-invoke on next run
        return

    for unit in peers:
        if 'client_urls' not in peers[unit].keys():
            msg = 'Waiting for unit to complete registration.'
            if ('peer_urls' in peers[unit].keys() and
                    peers[unit]['peer_urls'] and
                    get_ingress_address('cluster') in peers[unit]['peer_urls'] and  # noqa
                    not host.service_running(bag.etcd_daemon)):
                # We have a peer that is unstarted and it is this node.
                # We do not run etcd now. Instead of blocking everyone
                # try to self-unregister.
                try:
                    leader_address = leader_get('leader_address')
                    msg = 'Etcd service did not start. Will retry soon.'
                    etcdctl.unregister(peers[unit]['unit_id'], leader_address)
                except CalledProcessError:
                    log('Notice:  Unit failed to unregister', 'WARNING')
            # we cannot register. State not attainable.
            status_set('waiting', msg)
            return

    if not bag.cluster_unit_id:
        bag.leader_address = leader_get('leader_address')
        resp = etcdctl.register(bag.__dict__)
        if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys():  # noqa
            bag.cache_registration_detail('cluster_unit_id',
                                          resp['cluster_unit_id'])
            bag.cache_registration_detail('registration_peer_string',
                                          resp['cluster'])

            bag.cluster_unit_id = resp['cluster_unit_id']
            bag.cluster = resp['cluster']
        else:
            log('etcdctl.register failed, will retry')
            msg = 'Waiting to retry etcd registration'
            status_set('waiting', msg)
            return

    render_config(bag)
    host.service_restart(bag.etcd_daemon)
    time.sleep(2)

    # Check health status before we say we are good
    etcdctl = EtcdCtl()
    status = etcdctl.cluster_health()
    if 'unhealthy' in status:
        status_set('blocked', 'Cluster not healthy.')
        return
    open_port(bag.port)
    set_state('etcd.registered')
Ejemplo n.º 9
0
def check_cluster_health():
    ''' report on the cluster health every 5 minutes'''
    etcdctl = EtcdCtl()
    health = etcdctl.cluster_health()
    status_set('active', health['status'])