def check_cluster_health(): """report on the cluster health every 5 minutes""" etcdctl = EtcdCtl() health = etcdctl.cluster_health() # Determine if the unit is healthy or unhealthy if "unhealthy" in health["status"]: unit_health = "UnHealthy" else: unit_health = "Healthy" # Determine units peer count, and surface 0 by default try: peers = len(etcdctl.member_list()) except Exception: unit_health = "Errored" peers = 0 bp = "{0} with {1} known peer{2}" status_message = bp.format(unit_health, peers, "s" if peers != 1 else "") if unit_health in ["UnHealthy", "Errored"]: status.blocked(status_message) else: status.active(status_message)
def initialize_new_leader(): """Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one.""" bag = EtcdDatabag() bag.token = bag.token bag.set_cluster_state("new") address = get_ingress_address("cluster") cluster_connection_string = get_connection_string([address], bag.management_port) bag.set_cluster("{}={}".format(bag.unit_name, cluster_connection_string)) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if "unhealthy" in status: status.blocked("Cluster not healthy.") return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({"leader_address": leader_connection_string, "cluster": bag.cluster}) # set registered state since if we ever become a follower, we will not need # to re-register set_state("etcd.registered") # finish bootstrap delta and set configured state set_state("etcd.leader.configured")
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' cluster_connection_string = get_connection_string([bag.private_address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') host.service_restart('etcd') # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([bag.private_address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def leader_config_changed(): ''' The leader executes the runtime configuration update for the cluster, as it is the controlling unit. Will render config, close and open ports and restart the etcd service.''' configuration = hookenv.config() previous_port = configuration.previous('port') log('Previous port: {0}'.format(previous_port)) previous_mgmt_port = configuration.previous('management_port') log('Previous management port: {0}'.format(previous_mgmt_port)) if previous_port and previous_mgmt_port: bag = EtcdDatabag() etcdctl = EtcdCtl() members = etcdctl.member_list() # Iterate over all the members in the list. for unit_name in members: # Grab the previous peer url and replace the management port. peer_urls = members[unit_name]['peer_urls'] log('Previous peer url: {0}'.format(peer_urls)) old_port = ':{0}'.format(previous_mgmt_port) new_port = ':{0}'.format(configuration.get('management_port')) url = peer_urls.replace(old_port, new_port) # Update the member's peer_urls with the new ports. log(etcdctl.member_update(members[unit_name]['unit_id'], url)) # Render just the leaders configuration with the new values. render_config() # Close the previous client port and open the new one. close_open_ports() address = get_ingress_address('cluster') leader_set({ 'leader_address': get_connection_string([address], bag.management_port) }) host.service_restart(bag.etcd_daemon)
def leader_config_changed(): ''' The leader executes the runtime configuration update for the cluster, as it is the controlling unit. Will render config, close and open ports and restart the etcd service.''' configuration = hookenv.config() previous_port = configuration.previous('port') log('Previous port: {0}'.format(previous_port)) previous_mgmt_port = configuration.previous('management_port') log('Previous management port: {0}'.format(previous_mgmt_port)) if previous_port and previous_mgmt_port: bag = EtcdDatabag() etcdctl = EtcdCtl() members = etcdctl.member_list() # Iterate over all the members in the list. for unit_name in members: # Grab the previous peer url and replace the management port. peer_urls = members[unit_name]['peer_urls'] log('Previous peer url: {0}'.format(peer_urls)) old_port = ':{0}'.format(previous_mgmt_port) new_port = ':{0}'.format(configuration.get('management_port')) url = peer_urls.replace(old_port, new_port) # Update the member's peer_urls with the new ports. log(etcdctl.member_update(members[unit_name]['unit_id'], url)) # Render just the leaders configuration with the new values. render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') # Close the previous client port and open the new one. close_open_ports() leader_set({'leader_address': get_connection_string([bag.private_address], bag.management_port)}) host.service_restart('etcd')
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' address = get_ingress_address('cluster') cluster_connection_string = get_connection_string([address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def perform_self_unregistration(cluster=None): """Attempt self removal during unit teardown.""" etcdctl = EtcdCtl() leader_address = leader_get("leader_address") unit_name = os.getenv("JUJU_UNIT_NAME").replace("/", "") members = etcdctl.member_list() # Self Unregistration etcdctl.unregister(members[unit_name]["unit_id"], leader_address)
def perform_self_unregistration(cluster=None): ''' Attempt self removal during unit teardown. ''' etcdctl = EtcdCtl() leader_address = leader_get('leader_address') unit_name = os.getenv('JUJU_UNIT_NAME').replace('/', '') members = etcdctl.member_list() # Self Unregistration etcdctl.unregister(members[unit_name]['unit_id'], leader_address)
def unregister(cluster): ''' The leader will process the departing event and attempt unregistration for the departing unit. If the leader is departing, it will unregister all units prior to termination. ''' etcdctl = EtcdCtl() peers = cluster.get_peers() members = etcdctl.member_list() for unit in peers: cluster_name = unit.replace('/', '') if cluster_name in members.keys(): log("Unregistering {0}".format(unit)) etcdctl.unregister(members[cluster_name]['unit_id']) else: log("Received removal for disconnected member {}".format(unit)) cluster.dismiss()
def send_single_connection_details(db): ''' ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') etcdctl = EtcdCtl() # Set the key and cert on the db relation db.set_client_credentials(key, cert, ca) bag = EtcdDatabag() # Get all the peers participating in the cluster relation. members = [bag.private_address] # Create a connection string with this member on the configured port. connection_string = get_connection_string(members, bag.port) # Set the connection string on the db relation. db.set_connection_string(connection_string, version=etcdctl.version())
def check_cluster_health(): ''' report on the cluster health every 5 minutes''' etcdctl = EtcdCtl() health = etcdctl.cluster_health() # Determine if the unit is healthy or unhealthy if 'healthy' in health['status']: unit_health = "Healthy" else: unit_health = "Unhealthy" # Determine units peer count, and surface 0 by default try: peers = len(etcdctl.member_list()) except Exception: peers = 0 status_message = "{0} with {1} known peers.".format(unit_health, peers) status_set('active', status_message)
def dismantle_cluster(): """Disconnect other cluster members. This is a preparation step before restoring snapshot on the cluster. """ log('Disconnecting cluster members') etcdctl = EtcdCtl() etcd_conf = EtcdDatabag() my_name = etcd_conf.unit_name endpoint = 'https://{}:{}'.format(etcd_conf.cluster_address, etcd_conf.port) for name, data in etcdctl.member_list(endpoint).items(): if name != my_name: log('Disconnecting {}'.format(name), hookenv.DEBUG) etcdctl.unregister(data['unit_id'], endpoint) etcd_conf.cluster_state = 'new' conf_path = os.path.join(etcd_conf.etcd_conf_dir, "etcd.conf.yml") render('etcd3.conf', conf_path, etcd_conf.__dict__, owner='root', group='root')
def send_cluster_details(proxy): ''' Sends the peer cluster string to proxy units so they can join and act on behalf of the cluster. ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') proxy.set_client_credentials(key, cert, ca) # format a list of cluster participants etcdctl = EtcdCtl() peers = etcdctl.member_list() cluster = [] for peer in peers: thispeer = peers[peer] # Potential member doing registration. Default to skip if 'peer_urls' not in thispeer.keys() or not thispeer['peer_urls']: continue peer_string = "{}={}".format(thispeer['name'], thispeer['peer_urls']) cluster.append(peer_string) proxy.set_cluster_string(','.join(cluster))
def send_cluster_details(proxy): ''' Attempts to send the peer cluster string to proxy units so they can join and act on behalf of the cluster. ''' cert = leader_get('client_certificate') key = leader_get('client_key') ca = leader_get('certificate_authority') proxy.set_client_credentials(key, cert, ca) # format a list of cluster participants etcdctl = EtcdCtl() peers = etcdctl.member_list() cluster = [] for peer in peers: thispeer = peers[peer] # Potential member doing registration. Default to skip if 'peer_urls' not in thispeer.keys() or not thispeer['peer_urls']: continue peer_string = "{}={}".format(thispeer['name'], thispeer['peer_urls']) cluster.append(peer_string) proxy.set_cluster_string(','.join(cluster))
def check_cluster_health(): ''' report on the cluster health every 5 minutes''' etcdctl = EtcdCtl() health = etcdctl.cluster_health() # Determine if the unit is healthy or unhealthy if 'unhealthy' in health['status']: unit_health = "UnHealthy" else: unit_health = "Healthy" # Determine units peer count, and surface 0 by default try: peers = len(etcdctl.member_list()) except Exception: unit_health = "Errored" peers = 0 bp = "{0} with {1} known peer{2}" status_message = bp.format(unit_health, peers, 's' if peers != 1 else '') status_set('active', status_message)
def send_cluster_connection_details(cluster, db): ''' Need to set the cluster connection string and the client key and certificate on the relation object. ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') etcdctl = EtcdCtl() # Set the key, cert, and ca on the db relation db.set_client_credentials(key, cert, ca) port = hookenv.config().get('port') # Get all the peers participating in the cluster relation. members = cluster.get_db_ingress_addresses() # Append our own address to the membership list, because peers dont self # actualize address = get_ingress_address('db') members.append(address) members.sort() # Create a connection string with all the members on the configured port. connection_string = get_connection_string(members, port) # Set the connection string on the db relation. db.set_connection_string(connection_string, version=etcdctl.version())
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string render('defaults', '/etc/default/etcd', bag.__dict__) host.service_restart('etcd') time.sleep(2) peers = etcdctl.member_list(leader_get('leader_address')) for unit in peers: if 'client_urls' not in peers[unit].keys(): # we cannot register. State not attainable. msg = 'Waiting for unit to complete registration' status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] render('defaults', '/etc/default/etcd', bag.__dict__) host.service_restart('etcd') time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return open_port(bag.port) set_state('etcd.registered')
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' etcdctl = EtcdCtl() bag = EtcdDatabag() leader_address = leader_get('leader_address') bag.leader_address = leader_address try: # Check if we are already registered. Unregister ourselves if we are so # we can register from scratch. peer_url = 'https://%s:%s' % (bag.cluster_address, bag.management_port) members = etcdctl.member_list(leader_address) for member_name, member in members.items(): if member['peer_urls'] == peer_url: log('Found member that matches our peer URL. Unregistering...') etcdctl.unregister(member['unit_id'], leader_address) # Now register. resp = etcdctl.register(bag.__dict__) bag.cluster = resp['cluster'] except EtcdCtl.CommandFailed: log('etcdctl.register failed, will retry') msg = 'Waiting to retry etcd registration' status_set('waiting', msg) return render_config(bag) host.service_restart(bag.etcd_daemon) open_port(bag.port) set_state('etcd.registered')
def register_node_with_leader(cluster): """ Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. """ etcdctl = EtcdCtl() bag = EtcdDatabag() leader_address = leader_get("leader_address") bag.leader_address = leader_address try: # Check if we are already registered. Unregister ourselves if we are so # we can register from scratch. peer_url = "https://%s:%s" % (bag.cluster_address, bag.management_port) members = etcdctl.member_list(leader_address) for _, member in members.items(): if member["peer_urls"] == peer_url: log("Found member that matches our peer URL. Unregistering...") etcdctl.unregister(member["unit_id"], leader_address) # Now register. resp = etcdctl.register(bag.__dict__) bag.set_cluster(resp["cluster"]) except EtcdCtl.CommandFailed: log("etcdctl.register failed, will retry") msg = "Waiting to retry etcd registration" status.waiting(msg) return render_config(bag) host.service_restart(bag.etcd_daemon) open_port(bag.port) set_state("etcd.registered")
import subprocess import sys from charms import layer from etcdctl import EtcdCtl from charmhelpers.core.hookenv import ( action_get, action_set, action_fail, action_name ) CTL = EtcdCtl() def action_fail_now(*args, **kw): '''Call action_fail() and exit immediately. ''' action_fail(*args, **kw) sys.exit(0) def requires_etcd_version(version_regex, human_version=None): '''Decorator that enforces a specific version of etcdctl be present. The decorated function will only be executed if the required version of etcdctl is present. Otherwise, action_fail() will be called and
def etcdctl(self): return EtcdCtl()
def check_cluster_health(): ''' report on the cluster health every 5 minutes''' etcdctl = EtcdCtl() health = etcdctl.cluster_health() status_set('active', health['status'])
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string # conf_path = '{}/etcd.conf'.format(bag.etcd_conf_dir) render_config(bag) time.sleep(2) try: peers = etcdctl.member_list(leader_get('leader_address')) except CalledProcessError: log("Etcd attempted to invoke registration before service ready") # This error state is transient, and does not imply the unit is broken. # Erroring at this stage can be resolved, and should not effect the # overall condition of unit turn-up. Return from the method and let the # charm re-invoke on next run return for unit in peers: if 'client_urls' not in peers[unit].keys(): msg = 'Waiting for unit to complete registration.' if ('peer_urls' in peers[unit].keys() and peers[unit]['peer_urls'] and get_ingress_address('cluster') in peers[unit]['peer_urls'] and # noqa not host.service_running(bag.etcd_daemon)): # We have a peer that is unstarted and it is this node. # We do not run etcd now. Instead of blocking everyone # try to self-unregister. try: leader_address = leader_get('leader_address') msg = 'Etcd service did not start. Will retry soon.' etcdctl.unregister(peers[unit]['unit_id'], leader_address) except CalledProcessError: log('Notice: Unit failed to unregister', 'WARNING') # we cannot register. State not attainable. status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] else: log('etcdctl.register failed, will retry') msg = 'Waiting to retry etcd registration' status_set('waiting', msg) return render_config(bag) host.service_restart(bag.etcd_daemon) time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return open_port(bag.port) set_state('etcd.registered')