def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' cluster_connection_string = get_connection_string([bag.private_address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') host.service_restart('etcd') # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([bag.private_address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' address = get_ingress_address('cluster') cluster_connection_string = get_connection_string([address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string render('defaults', '/etc/default/etcd', bag.__dict__) host.service_restart('etcd') time.sleep(2) peers = etcdctl.member_list(leader_get('leader_address')) for unit in peers: if 'client_urls' not in peers[unit].keys(): # we cannot register. State not attainable. msg = 'Waiting for unit to complete registration' status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] render('defaults', '/etc/default/etcd', bag.__dict__) host.service_restart('etcd') time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return open_port(bag.port) set_state('etcd.registered')
def render_config(bag=None): """Render the etcd configuration template for the given version""" if not bag: bag = EtcdDatabag() move_etcd_data_to_standard_location() v2_conf_path = "{}/etcd.conf".format(bag.etcd_conf_dir) v3_conf_path = "{}/etcd.conf.yml".format(bag.etcd_conf_dir) # probe for 2.x compatibility if etcd_version().startswith("2."): render("etcd2.conf", v2_conf_path, bag.__dict__, owner="root", group="root") # default to 3.x template behavior else: render("etcd3.conf", v3_conf_path, bag.__dict__, owner="root", group="root") if os.path.exists(v2_conf_path): # v3 will fail if the v2 config is left in place os.remove(v2_conf_path) # Close the previous client port and open the new one. close_open_ports() remove_state("etcd.rerender-config")
def leader_config_changed(): ''' The leader executes the runtime configuration update for the cluster, as it is the controlling unit. Will render config, close and open ports and restart the etcd service.''' configuration = hookenv.config() previous_port = configuration.previous('port') log('Previous port: {0}'.format(previous_port)) previous_mgmt_port = configuration.previous('management_port') log('Previous management port: {0}'.format(previous_mgmt_port)) if previous_port and previous_mgmt_port: bag = EtcdDatabag() etcdctl = EtcdCtl() members = etcdctl.member_list() # Iterate over all the members in the list. for unit_name in members: # Grab the previous peer url and replace the management port. peer_urls = members[unit_name]['peer_urls'] log('Previous peer url: {0}'.format(peer_urls)) old_port = ':{0}'.format(previous_mgmt_port) new_port = ':{0}'.format(configuration.get('management_port')) url = peer_urls.replace(old_port, new_port) # Update the member's peer_urls with the new ports. log(etcdctl.member_update(members[unit_name]['unit_id'], url)) # Render just the leaders configuration with the new values. render_config() # Close the previous client port and open the new one. close_open_ports() address = get_ingress_address('cluster') leader_set({ 'leader_address': get_connection_string([address], bag.management_port) }) host.service_restart(bag.etcd_daemon)
def move_etcd_data_to_standard_location(): ''' Moves etcd data to the standard location if it's not already located there. This is necessary when generating new etcd config after etcd has been upgraded from version 2.3 to 3.x. ''' bag = EtcdDatabag() conf_path = bag.etcd_conf_dir + '/etcd.conf.yml' if not os.path.exists(conf_path): return with open(conf_path) as f: conf = yaml.safe_load(f) data_dir = conf['data-dir'] desired_data_dir = bag.etcd_data_dir if data_dir != desired_data_dir: log('Moving etcd data from %s to %s' % (data_dir, desired_data_dir)) host.service_stop('snap.etcd.etcd') for filename in os.listdir(data_dir): os.rename( data_dir + '/' + filename, desired_data_dir + '/' + filename ) os.rmdir(data_dir) conf['data-dir'] = desired_data_dir with open(conf_path, 'w') as f: yaml.dump(conf, f) host.service_start('snap.etcd.etcd')
def rerender_config(): """Config must be updated and service restarted""" bag = EtcdDatabag() log("Rendering config file for {0}".format(bag.unit_name)) render_config() if host.service_running(bag.etcd_daemon): host.service_restart(bag.etcd_daemon) set_app_version()
def initialize_new_leader(): """Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one.""" bag = EtcdDatabag() bag.token = bag.token bag.set_cluster_state("new") address = get_ingress_address("cluster") cluster_connection_string = get_connection_string([address], bag.management_port) bag.set_cluster("{}={}".format(bag.unit_name, cluster_connection_string)) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if "unhealthy" in status: status.blocked("Cluster not healthy.") return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({"leader_address": leader_connection_string, "cluster": bag.cluster}) # set registered state since if we ever become a follower, we will not need # to re-register set_state("etcd.registered") # finish bootstrap delta and set configured state set_state("etcd.leader.configured")
def follower_config_changed(): ''' Follower units need to render the configuration file, close and open ports, and restart the etcd service. ''' bag = EtcdDatabag() log('Rendering defaults file for {0}'.format(bag.unit_name)) # Render the follower's configuration with the new values. render_config() # Close the previous client port and open the new one. close_open_ports()
def follower_config_changed(): ''' Follower units need to render the configuration file, close and open ports, and restart the etcd service. ''' bag = EtcdDatabag() log('Rendering defaults file for {0}'.format(bag.unit_name)) # Render the follower's configuration with the new values. render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') # Close the previous client port and open the new one. close_open_ports() host.service_restart('etcd')
def test_force_rejoin(self, sleep, path_join, rmtree, path_exists, register_node): """Test that force_rejoin performs required steps.""" data_dir = "/foo/bar" path_exists.return_value = True path_join.return_value = data_dir force_rejoin() host.service_stop.assert_called_with(EtcdDatabag().etcd_daemon) clear_flag.assert_called_with("etcd.registered") rmtree.assert_called_with(data_dir) register_node.assert_called()
def dismantle_cluster(): """Disconnect other cluster members. This is a preparation step before restoring snapshot on the cluster. """ log('Disconnecting cluster members') etcdctl = EtcdCtl() etcd_conf = EtcdDatabag() my_name = etcd_conf.unit_name endpoint = 'https://{}:{}'.format(etcd_conf.cluster_address, etcd_conf.port) for name, data in etcdctl.member_list(endpoint).items(): if name != my_name: log('Disconnecting {}'.format(name), hookenv.DEBUG) etcdctl.unregister(data['unit_id'], endpoint) etcd_conf.cluster_state = 'new' conf_path = os.path.join(etcd_conf.etcd_conf_dir, "etcd.conf.yml") render('etcd3.conf', conf_path, etcd_conf.__dict__, owner='root', group='root')
def force_rejoin(): """Wipe local data and rejoin new cluster formed by leader unit This action is required if leader unit performed snapshot restore. All other members must remove their local data and previous cluster identities and join newly formed, restored, cluster. """ log("Wiping local storage and rejoining cluster") conf = EtcdDatabag() host.service_stop(conf.etcd_daemon) clear_flag("etcd.registered") etcd_data = os.path.join(conf.storage_path(), "member") if os.path.exists(etcd_data): shutil.rmtree(etcd_data) for _ in range(11): # We need randomized back-off timer because only one unit can be # joining at the same time time.sleep(random.randint(1, 10)) register_node_with_leader(None) if is_flag_set("etcd.registered"): log("Successfully rejoined the cluster") break
def tls_state_control(): ''' This state represents all the complexity of handling the TLS certs. instead of stacking decorators, this state condenses it into a single state we can gate on before progressing with secure setup. Also handles ensuring users of the system can access the TLS certificates''' bag = EtcdDatabag() if not os.path.isdir(bag.etcd_conf_dir): hookenv.log('Waiting for etcd conf creation.') return cmd = ['chown', '-R', 'root:ubuntu', bag.etcd_conf_dir] check_call(cmd) set_state('etcd.ssl.placed')
def tls_update(): """Handle changes to the TLS data by ensuring that the service is restarted. """ # ensure config is updated with new certs and service restarted bag = EtcdDatabag() render_config(bag) host.service_restart(bag.etcd_daemon) # ensure that certs are re-echoed to the db relations remove_state("etcd.ssl.placed") remove_state("tls_client.ca.written") remove_state("tls_client.server.certificate.written") remove_state("tls_client.client.certificate.written")
def register_node_with_leader(cluster): """ Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. """ etcdctl = EtcdCtl() bag = EtcdDatabag() leader_address = leader_get("leader_address") bag.leader_address = leader_address try: # Check if we are already registered. Unregister ourselves if we are so # we can register from scratch. peer_url = "https://%s:%s" % (bag.cluster_address, bag.management_port) members = etcdctl.member_list(leader_address) for _, member in members.items(): if member["peer_urls"] == peer_url: log("Found member that matches our peer URL. Unregistering...") etcdctl.unregister(member["unit_id"], leader_address) # Now register. resp = etcdctl.register(bag.__dict__) bag.set_cluster(resp["cluster"]) except EtcdCtl.CommandFailed: log("etcdctl.register failed, will retry") msg = "Waiting to retry etcd registration" status.waiting(msg) return render_config(bag) host.service_restart(bag.etcd_daemon) open_port(bag.port) set_state("etcd.registered")
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' etcdctl = EtcdCtl() bag = EtcdDatabag() leader_address = leader_get('leader_address') bag.leader_address = leader_address try: # Check if we are already registered. Unregister ourselves if we are so # we can register from scratch. peer_url = 'https://%s:%s' % (bag.cluster_address, bag.management_port) members = etcdctl.member_list(leader_address) for member_name, member in members.items(): if member['peer_urls'] == peer_url: log('Found member that matches our peer URL. Unregistering...') etcdctl.unregister(member['unit_id'], leader_address) # Now register. resp = etcdctl.register(bag.__dict__) bag.cluster = resp['cluster'] except EtcdCtl.CommandFailed: log('etcdctl.register failed, will retry') msg = 'Waiting to retry etcd registration' status_set('waiting', msg) return render_config(bag) host.service_restart(bag.etcd_daemon) open_port(bag.port) set_state('etcd.registered')
def send_single_connection_details(db): ''' ''' cert = leader_get('client_certificate') key = leader_get('client_key') ca = leader_get('certificate_authority') # Set the key and cert on the db relation db.set_client_credentials(key, cert, ca) bag = EtcdDatabag() # Get all the peers participating in the cluster relation. members = [bag.private_address] # Create a connection string with this member on the configured port. connection_string = get_connection_string(members, bag.port) # Set the connection string on the db relation. db.set_connection_string(connection_string)
def render_config(bag=None): ''' Render the etcd configuration template for the given version ''' if not bag: bag = EtcdDatabag() # probe for 2.x compatibility if etcd_version().startswith('2.'): conf_path = "{}/etcd.conf".format(bag.etcd_conf_dir) render('etcd2.conf', conf_path, bag.__dict__, owner='root', group='root') # default to 3.x template behavior else: conf_path = "{}/etcd.conf.yml".format(bag.etcd_conf_dir) render('etcd3.conf', conf_path, bag.__dict__, owner='root', group='root')
def send_single_connection_details(db): ''' ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') etcdctl = EtcdCtl() # Set the key and cert on the db relation db.set_client_credentials(key, cert, ca) bag = EtcdDatabag() # Get all the peers participating in the cluster relation. members = [bag.private_address] # Create a connection string with this member on the configured port. connection_string = get_connection_string(members, bag.port) # Set the connection string on the db relation. db.set_connection_string(connection_string, version=etcdctl.version())
def send_cluster_connection_details(cluster, db): ''' Need to set the cluster connection string and the client key and certificate on the relation object. ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') etcdctl = EtcdCtl() bag = EtcdDatabag() # Set the key, cert, and ca on the db relation db.set_client_credentials(key, cert, ca) port = hookenv.config().get('port') # Get all the peers participating in the cluster relation. members = cluster.get_peer_addresses() # Append our own address to the membership list, because peers dont self # actualize members.append(bag.private_address) members.sort() # Create a connection string with all the members on the configured port. connection_string = get_connection_string(members, port) # Set the connection string on the db relation. db.set_connection_string(connection_string, version=etcdctl.version())
def bind_to_all_interfaces_changed(): ''' Config must be updated and service restarted ''' bag = EtcdDatabag() log('Rendering config file for {0}'.format(bag.unit_name)) render_config() host.service_restart(bag.etcd_daemon)
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string # conf_path = '{}/etcd.conf'.format(bag.etcd_conf_dir) render_config(bag) time.sleep(2) try: peers = etcdctl.member_list(leader_get('leader_address')) except CalledProcessError: log("Etcd attempted to invoke registration before service ready") # This error state is transient, and does not imply the unit is broken. # Erroring at this stage can be resolved, and should not effect the # overall condition of unit turn-up. Return from the method and let the # charm re-invoke on next run return for unit in peers: if 'client_urls' not in peers[unit].keys(): msg = 'Waiting for unit to complete registration.' if ('peer_urls' in peers[unit].keys() and peers[unit]['peer_urls'] and get_ingress_address('cluster') in peers[unit]['peer_urls'] and # noqa not host.service_running(bag.etcd_daemon)): # We have a peer that is unstarted and it is this node. # We do not run etcd now. Instead of blocking everyone # try to self-unregister. try: leader_address = leader_get('leader_address') msg = 'Etcd service did not start. Will retry soon.' etcdctl.unregister(peers[unit]['unit_id'], leader_address) except CalledProcessError: log('Notice: Unit failed to unregister', 'WARNING') # we cannot register. State not attainable. status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] else: log('etcdctl.register failed, will retry') msg = 'Waiting to retry etcd registration' status_set('waiting', msg) return render_config(bag) host.service_restart(bag.etcd_daemon) time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return open_port(bag.port) set_state('etcd.registered')
def post_series_upgrade(): bag = EtcdDatabag() host.service_resume(bag.etcd_daemon)
def format_and_mount_storage(): ''' This allows users to request persistent volumes from the cloud provider for the purposes of disaster recovery. ''' set_state('data.volume.attached') # Query juju for the information about the block storage device_info = storage_get() block = device_info['location'] bag = EtcdDatabag() bag.cluster = leader_get('cluster') # the databag has behavior that keeps the path updated. # Reference the default path from layer_options. etcd_opts = layer.options('etcd') # Split the tail of the path to mount the volume 1 level before # the data directory. tail = os.path.split(bag.etcd_data_dir)[0] if volume_is_mounted(block): hookenv.log('Device is already attached to the system.') hookenv.log('Refusing to take action against {}'.format(block)) return # Format the device in non-interactive mode cmd = ['mkfs.ext4', device_info['location'], '-F'] hookenv.log('Creating filesystem on {}'.format(device_info['location'])) hookenv.log('With command: {}'.format(' '.join(cmd))) check_call(cmd) # halt etcd to perform the data-store migration host.service_stop(bag.etcd_daemon) os.makedirs(tail, exist_ok=True) mount_volume(block, tail) # handle first run during early-attach storage, pre-config-changed hook. os.makedirs(bag.etcd_data_dir, exist_ok=True) # Only attempt migration if directory exists if os.path.isdir(etcd_opts['etcd_data_dir']): migrate_path = "{}/".format(etcd_opts['etcd_data_dir']) output_path = "{}/".format(bag.etcd_data_dir) cmd = ['rsync', '-azp', migrate_path, output_path] hookenv.log('Detected existing data, migrating to new location.') hookenv.log('With command: {}'.format(' '.join(cmd))) check_call(cmd) with open('/etc/fstab', 'r') as fp: contents = fp.readlines() found = 0 # scan fstab for the device for line in contents: if block in line: found = found + 1 # if device not in fstab, append so it persists through reboots if not found > 0: append = "{0} {1} ext4 defaults 0 0".format(block, tail) # noqa with open('/etc/fstab', 'a') as fp: fp.writelines([append]) # Finally re-render the configuration and resume operation render_config(bag) host.service_restart(bag.etcd_daemon)
def pre_series_upgrade(): bag = EtcdDatabag() host.service_pause(bag.etcd_daemon) status.blocked("Series upgrade in progress")