def check_cluster_health(): """report on the cluster health every 5 minutes""" etcdctl = EtcdCtl() health = etcdctl.cluster_health() # Determine if the unit is healthy or unhealthy if "unhealthy" in health["status"]: unit_health = "UnHealthy" else: unit_health = "Healthy" # Determine units peer count, and surface 0 by default try: peers = len(etcdctl.member_list()) except Exception: unit_health = "Errored" peers = 0 bp = "{0} with {1} known peer{2}" status_message = bp.format(unit_health, peers, "s" if peers != 1 else "") if unit_health in ["UnHealthy", "Errored"]: status.blocked(status_message) else: status.active(status_message)
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' cluster_connection_string = get_connection_string([bag.private_address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') host.service_restart('etcd') # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([bag.private_address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' address = get_ingress_address('cluster') cluster_connection_string = get_connection_string([address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def initialize_new_leader(): """Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one.""" bag = EtcdDatabag() bag.token = bag.token bag.set_cluster_state("new") address = get_ingress_address("cluster") cluster_connection_string = get_connection_string([address], bag.management_port) bag.set_cluster("{}={}".format(bag.unit_name, cluster_connection_string)) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if "unhealthy" in status: status.blocked("Cluster not healthy.") return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({"leader_address": leader_connection_string, "cluster": bag.cluster}) # set registered state since if we ever become a follower, we will not need # to re-register set_state("etcd.registered") # finish bootstrap delta and set configured state set_state("etcd.leader.configured")
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string render('defaults', '/etc/default/etcd', bag.__dict__) host.service_restart('etcd') time.sleep(2) peers = etcdctl.member_list(leader_get('leader_address')) for unit in peers: if 'client_urls' not in peers[unit].keys(): # we cannot register. State not attainable. msg = 'Waiting for unit to complete registration' status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] render('defaults', '/etc/default/etcd', bag.__dict__) host.service_restart('etcd') time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return open_port(bag.port) set_state('etcd.registered')
def check_cluster_health(): ''' report on the cluster health every 5 minutes''' etcdctl = EtcdCtl() health = etcdctl.cluster_health() # Determine if the unit is healthy or unhealthy if 'healthy' in health['status']: unit_health = "Healthy" else: unit_health = "Unhealthy" # Determine units peer count, and surface 0 by default try: peers = len(etcdctl.member_list()) except Exception: peers = 0 status_message = "{0} with {1} known peers.".format(unit_health, peers) status_set('active', status_message)
def check_cluster_health(): ''' report on the cluster health every 5 minutes''' etcdctl = EtcdCtl() health = etcdctl.cluster_health() # Determine if the unit is healthy or unhealthy if 'unhealthy' in health['status']: unit_health = "UnHealthy" else: unit_health = "Healthy" # Determine units peer count, and surface 0 by default try: peers = len(etcdctl.member_list()) except Exception: unit_health = "Errored" peers = 0 bp = "{0} with {1} known peer{2}" status_message = bp.format(unit_health, peers, 's' if peers != 1 else '') status_set('active', status_message)
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string # conf_path = '{}/etcd.conf'.format(bag.etcd_conf_dir) render_config(bag) time.sleep(2) try: peers = etcdctl.member_list(leader_get('leader_address')) except CalledProcessError: log("Etcd attempted to invoke registration before service ready") # This error state is transient, and does not imply the unit is broken. # Erroring at this stage can be resolved, and should not effect the # overall condition of unit turn-up. Return from the method and let the # charm re-invoke on next run return for unit in peers: if 'client_urls' not in peers[unit].keys(): msg = 'Waiting for unit to complete registration.' if ('peer_urls' in peers[unit].keys() and peers[unit]['peer_urls'] and get_ingress_address('cluster') in peers[unit]['peer_urls'] and # noqa not host.service_running(bag.etcd_daemon)): # We have a peer that is unstarted and it is this node. # We do not run etcd now. Instead of blocking everyone # try to self-unregister. try: leader_address = leader_get('leader_address') msg = 'Etcd service did not start. Will retry soon.' etcdctl.unregister(peers[unit]['unit_id'], leader_address) except CalledProcessError: log('Notice: Unit failed to unregister', 'WARNING') # we cannot register. State not attainable. status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] else: log('etcdctl.register failed, will retry') msg = 'Waiting to retry etcd registration' status_set('waiting', msg) return render_config(bag) host.service_restart(bag.etcd_daemon) time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return open_port(bag.port) set_state('etcd.registered')
def check_cluster_health(): ''' report on the cluster health every 5 minutes''' etcdctl = EtcdCtl() health = etcdctl.cluster_health() status_set('active', health['status'])