def remove_metadata(): if is_leader() and leader_get("metadata-provisioned"): # impossible to know if current hook is firing because # relation or leader is being removed lp #1469731 if not relation_ids("cluster"): unprovision_metadata() leader_set({"metadata-provisioned": ""})
def maintain_seeds(): '''The leader needs to maintain the list of seed nodes''' seed_ips = helpers.get_seed_ips() hookenv.log('Current seeds == {!r}'.format(seed_ips), DEBUG) bootstrapped_ips = helpers.get_bootstrapped_ips() hookenv.log('Bootstrapped == {!r}'.format(bootstrapped_ips), DEBUG) # Remove any seeds that are no longer bootstrapped, such as dropped # units. seed_ips.intersection_update(bootstrapped_ips) # Add more bootstrapped nodes, if necessary, to get to our maximum # of 3 seeds. potential_seed_ips = list(reversed(sorted(bootstrapped_ips))) while len(seed_ips) < 3 and potential_seed_ips: seed_ips.add(potential_seed_ips.pop()) # If there are no seeds or bootstrapped nodes, start with the leader. Us. if len(seed_ips) == 0: seed_ips.add(hookenv.unit_private_ip()) hookenv.log('Updated seeds == {!r}'.format(seed_ips), DEBUG) hookenv.leader_set(seeds=','.join(sorted(seed_ips)))
def upgrade_charm(): apt_install(determine_packages(), fatal=True) if remove_old_packages(): log("Package purge detected, restarting services", "INFO") for s in services(): service_restart(s) if is_leader(): # if we are upgrading, then the old version might have used the # HEAT_PATH/encryption-key. So we grab the key from that, and put it in # leader settings to ensure that the key remains the same during an # upgrade. encryption_path = os.path.join(HEAT_PATH, 'encryption-key') if os.path.isfile(encryption_path): with open(encryption_path, 'r') as f: encryption_key = f.read() try: leader_set({'heat-auth-encryption-key': encryption_key}) except subprocess.CalledProcessError as e: log("upgrade: leader_set: heat-auth-encryption-key failed," " didn't delete the existing file: {}.\n" "Error was: ".format(encryption_path, str(e)), level=WARNING) else: # now we just delete the file os.remove(encryption_path) leader_elected()
def master_joined(interface='master'): cluster_id = get_cluster_id() if not is_clustered(): log("Not clustered yet", level=DEBUG) return relation_settings = {} leader_settings = leader_get() if is_leader(): if not leader_settings.get('async-rep-password'): # Replication password cannot be longer than 32 characters leader_set({'async-rep-password': pwgen(32)}) return configure_master() master_address, master_file, master_position = ( get_master_status(interface)) if leader_settings.get('master-address') is not master_address: leader_settings['master-address'] = master_address leader_settings['master-file'] = master_file leader_settings['master-position'] = master_position leader_set(leader_settings) relation_settings = {'leader': True} else: relation_settings = {'leader': False} relation_settings['cluster_id'] = cluster_id relation_settings['master_address'] = leader_settings['master-address'] relation_settings['master_file'] = leader_settings['master-file'] relation_settings['master_password'] = \ leader_settings['async-rep-password'] relation_settings['master_position'] = leader_settings['master-position'] log("Setting master relation: '{}'".format(relation_settings), level=INFO) for rid in relation_ids(interface): relation_set(relation_id=rid, relation_settings=relation_settings)
def create_initial_servers_and_domains(cls): """Create the nameserver entry and domains based on the charm user supplied config NOTE(AJK): This only wants to be done ONCE and by the leader, so we use leader settings to store that we've done it, after it's successfully completed. @returns None """ KEY = 'create_initial_servers_and_domains' if hookenv.is_leader() and not hookenv.leader_get(KEY): nova_domain_name = hookenv.config('nova-domain') neutron_domain_name = hookenv.config('neutron-domain') with cls.check_zone_ids(nova_domain_name, neutron_domain_name): if hookenv.config('nameservers'): for ns in hookenv.config('nameservers').split(): cls.create_server(ns) else: hookenv.log('No nameserver specified, skipping creation of' 'nova and neutron domains', level=hookenv.WARNING) return if nova_domain_name: cls.create_domain( nova_domain_name, hookenv.config('nova-domain-email')) if neutron_domain_name: cls.create_domain( neutron_domain_name, hookenv.config('neutron-domain-email')) # if this fails, we weren't the leader any more; another unit may # attempt to do this too. hookenv.leader_set({KEY: 'done'})
def configure_floating_ip_pools(): if is_leader(): floating_pools = config.get("floating-ip-pools") previous_floating_pools = leader_get("floating-ip-pools") if floating_pools != previous_floating_pools: # create/destroy pools, activate/deactivate projects # according to new value pools = { (pool["project"], pool["network"], pool["pool-name"]): set(pool["target-projects"]) for pool in yaml.safe_load(floating_pools) } \ if floating_pools else {} previous_pools = {} if previous_floating_pools: for pool in yaml.safe_load(previous_floating_pools): projects = pool["target-projects"] name = (pool["project"], pool["network"], pool["pool-name"]) if name in pools: previous_pools[name] = set(projects) else: floating_ip_pool_delete(name, projects) for name, projects in pools.iteritems(): if name not in previous_pools: floating_ip_pool_create(name, projects) else: floating_ip_pool_update(name, projects, previous_pools[name]) leader_set({"floating-ip-pools": floating_pools})
def leader_set(settings=None, **kw): """Change leadership settings, per charmhelpers.core.hookenv.leader_set. The leadership.set.{key} reactive state will be set while the leadership hook environment setting remains set. Changed leadership settings will set the leadership.changed.{key} and leadership.changed states. These states will remain set until the following hook. These state changes take effect immediately on the leader, and in future hooks run on non-leaders. In this way both leaders and non-leaders can share handlers, waiting on these states. """ settings = settings or {} settings.update(kw) previous = unitdata.kv().getrange("leadership.settings.", strip=True) for key, value in settings.items(): if value != previous.get(key): reactive.set_state("leadership.changed.{}".format(key)) reactive.set_state("leadership.changed") reactive.helpers.toggle_state("leadership.set.{}".format(key), value is not None) hookenv.leader_set(settings) unitdata.kv().update(settings, prefix="leadership.settings.")
def leader_config_changed(): ''' The leader executes the runtime configuration update for the cluster, as it is the controlling unit. Will render config, close and open ports and restart the etcd service.''' configuration = hookenv.config() previous_port = configuration.previous('port') log('Previous port: {0}'.format(previous_port)) previous_mgmt_port = configuration.previous('management_port') log('Previous management port: {0}'.format(previous_mgmt_port)) if previous_port and previous_mgmt_port: bag = EtcdDatabag() etcdctl = EtcdCtl() members = etcdctl.member_list() # Iterate over all the members in the list. for unit_name in members: # Grab the previous peer url and replace the management port. peer_urls = members[unit_name]['peer_urls'] log('Previous peer url: {0}'.format(peer_urls)) old_port = ':{0}'.format(previous_mgmt_port) new_port = ':{0}'.format(configuration.get('management_port')) url = peer_urls.replace(old_port, new_port) # Update the member's peer_urls with the new ports. log(etcdctl.member_update(members[unit_name]['unit_id'], url)) # Render just the leaders configuration with the new values. render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') # Close the previous client port and open the new one. close_open_ports() leader_set({'leader_address': get_connection_string([bag.private_address], bag.management_port)}) host.service_restart('etcd')
def migrate_passwords_to_leader_storage(self, excludes=None): """Migrate any passwords storage on disk to leader storage.""" if not is_leader(): log("Skipping password migration as not the lead unit", level=DEBUG) return dirname = os.path.dirname(self.root_passwd_file_template) path = os.path.join(dirname, '*.passwd') for f in glob.glob(path): if excludes and f in excludes: log("Excluding %s from leader storage migration" % (f), level=DEBUG) continue key = os.path.basename(f) with open(f, 'r') as passwd: _value = passwd.read().strip() try: leader_set(settings={key: _value}) if self.delete_ondisk_passwd_file: os.unlink(f) except ValueError: # NOTE cluster relation not yet ready - skip for now pass
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' cluster_connection_string = get_connection_string([bag.private_address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render('defaults', '/etc/default/etcd', bag.__dict__, owner='root', group='root') host.service_restart('etcd') # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([bag.private_address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def update_pools(self): # designate-manage communicates with designate via message bus so no # need to set OS_ vars # NOTE(AJK) this runs with every hook (once most relations are up) and # so if it fails it will be picked up by the next relation change or # update-status. i.e. it will heal eventually. if hookenv.is_leader(): try: cmd = "designate-manage pool update" # Note(tinwood) that this command may fail if the pools.yaml # doesn't actually contain any pools. This happens when the # relation is broken, which errors out the charm. This stops # this happening and logs the error. subprocess.check_call(cmd.split(), timeout=60) # Update leader db to trigger restarts hookenv.leader_set( {'pool-yaml-hash': host.file_hash(POOLS_YAML)}) except subprocess.CalledProcessError as e: hookenv.log("designate-manage pool update failed: {}" .format(str(e))) except subprocess.TimeoutExpired as e: # the timeout is if the rabbitmq server has gone away; it just # retries continuously; this lets the hook complete. hookenv.log("designate-manage pool command timed out: {}". format(str(e)))
def contrail_ifmap_joined(): if is_leader(): creds = leader_get("ifmap-creds") creds = json.loads(creds) if creds else {} # prune credentials because we can't remove them directly lp #1469731 creds = { rid: { unit: units[unit] for unit, units in ((unit, creds[rid]) for unit in related_units(rid)) if unit in units } for rid in relation_ids("contrail-ifmap") if rid in creds } rid = relation_id() if rid not in creds: creds[rid] = {} cs = creds[rid] unit = remote_unit() if unit in cs: return # generate new credentials for unit cs[unit] = { "username": unit, "password": pwgen(32) } leader_set({"ifmap-creds": json.dumps(creds)}) write_ifmap_config() service_restart("supervisor-config") relation_set(creds=json.dumps(cs))
def add_metadata(): # check relation dependencies if is_leader() \ and not leader_get("metadata-provisioned") \ and config_get("contrail-api-configured") \ and config_get("neutron-metadata-ready"): provision_metadata() leader_set({"metadata-provisioned": True})
def create_tenant(): status_set("maintenance", "Creating tenant") render(source="tenant.yaml", target="/tmp/tenant.yaml", owner="openvim", perms=0o664, context={}) cmd = 'openvim tenant-create /tmp/tenant.yaml' tenant_uuid = sh_as_openvim(cmd).split()[0] tenant_uuid = str(tenant_uuid, 'utf-8') leader_set({'tenant': tenant_uuid}) return tenant_uuid
def make_this_leader(): host = get_this_hostname() port = get_this_port('qdb_port') hookenv.leader_set(leader_hostname=host) hookenv.leader_set(leader_port=port) hookenv.log('leader elected: ' + host + ':' + str(port))
def db_sync(self): sync_done = leader_get(attribute='db-sync-done') if not sync_done: subprocess.check_call(self.sync_cmd) leader_set({'db-sync-done': True}) # Restart services immediatly after db sync as # render_domain_config needs a working system self.restart_all()
def write_and_restart(): # NOTE(jamespage): seed uuid for use on compute nodes with libvirt if not leader_get('secret-uuid') and is_leader(): leader_set({'secret-uuid': str(uuid.uuid4())}) # NOTE(jamespage): trigger any configuration related changes # for cephx permissions restrictions ceph_changed() CONFIGS.write_all()
def db_sync(self): """Perform a database sync using the command defined in the self.sync_cmd attribute. The services defined in self.services are restarted after the database sync. """ if not self.db_sync_done() and hookenv.is_leader(): subprocess.check_call(self.sync_cmd) hookenv.leader_set({'db-sync-done': True}) # Restart services immediately after db sync as # render_domain_config needs a working system self.restart_all()
def upgrade_charm(): download_cplane_installer() upgrade_type = get_upgrade_type() stop_jboss_service() if upgrade_type == 'clean-db': if is_leader(): leader_set({'status': "db_cleaned"}) clean_create_db() cplane_installer() if config('intall-reboot-scripts') == 'y': install_reboot_scripts() start_services(upgrade_type)
def series_upgrade_prepare(): set_unit_upgrading() if not is_unit_paused_set(): log("Pausing unit for series upgrade.") rabbit.pause_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES)) if is_leader(): if not leader_get('cluster_series_upgrading'): # Inform the entire cluster a series upgrade is occurring. # Run the complete-cluster-series-upgrade action on the leader to # clear this setting when the full cluster has completed its # upgrade. leader_set(cluster_series_upgrading=True)
def complete_cluster_series_upgrade(args): """ Complete the series upgrade process After all nodes have been upgraded, this action is run to inform the whole cluster the upgrade is done. Config files will be re-rendered with each peer in the wsrep_cluster_address config. """ if is_leader(): # Unset cluster_series_upgrading leader_set(cluster_series_upgrading="") leader_set(cluster_series_upgrade_leader="") config_changed()
def broadcast_client_credentials(): ''' As the leader, copy the client cert and key to the charm dir and set the contents as leader data.''' charm_dir = os.getenv('CHARM_DIR') client_cert(None, charm_dir) client_key(None, charm_dir) with open('client.crt') as fp: client_certificate = fp.read() with open('client.key') as fp: client_certificate_key = fp.read() leader_set({'client_certificate': client_certificate, 'client_key': client_certificate_key}) set_state('client-credentials-relayed')
def ceilometer_upgrade(action=False): """Execute ceilometer-upgrade command, with retry on failure if gnocchi API is not ready for requests""" if is_leader() or action: if (CompareOpenStackReleases(os_release('ceilometer-common')) >= 'newton'): cmd = ['ceilometer-upgrade'] else: cmd = ['ceilometer-dbsync'] log("Running ceilomter-upgrade: {}".format(" ".join(cmd)), DEBUG) subprocess.check_call(cmd) log("ceilometer-upgrade succeeded", DEBUG) leader_set(ceilometer_upgrade_run=True)
def leader_elected(): '''Set the leader nodes IP''' if is_leader(): leader_set(**{'leader-ip': get_relation_ip('cluster')}) else: log('leader-elected hook executed, but this unit is not the leader', level=INFO) # NOTE(tkurek): re-set 'master' relation data if relation_ids('master'): master_joined() # NOTE(tkurek): configure new leader if relation_ids('slave'): configure_slave()
def set_sync_info(self, sync_time, sync_file): """Update leader DB with sync information :param sync_time: str Time sync was created in epoch seconds :param sync_file: str Local file containing zone information :returns: None """ sync_info = { LEADERDB_SYNC_SRC_KEY: 'http://{}:80/zone-syncs/{}'.format( hookenv.unit_private_ip(), sync_file), LEADERDB_SYNC_TIME_KEY: sync_time, } hookenv.leader_set(sync_info)
def leader_elected(): if is_leader(): if not leader_get('heat-domain-admin-passwd'): try: leader_set({'heat-domain-admin-passwd': pwgen(32)}) except subprocess.CalledProcessError as e: log('leader_set: heat-domain-admin-password failed: {}' .format(str(e)), level=WARNING) if not leader_get('heat-auth-encryption-key'): try: leader_set({'heat-auth-encryption-key': pwgen(32)}) except subprocess.CalledProcessError as e: log('leader_set: heat-domain-admin-password failed: {}' .format(str(e)), level=WARNING)
def check_zone_ids(cls, nova_domain_name, neutron_domain_name): zone_org_ids = { 'nova-domain-id': cls.get_domain_id(nova_domain_name), 'neutron-domain-id': cls.get_domain_id(neutron_domain_name), } yield zone_ids = { 'nova-domain-id': cls.get_domain_id(nova_domain_name), 'neutron-domain-id': cls.get_domain_id(neutron_domain_name), } if zone_org_ids != zone_ids: # Update leader-db to trigger peers to rerender configs # as sink files will need updating with new domain ids # Use host ID and current time UUID to help with debugging hookenv.leader_set({'domain-init-done': uuid.uuid1()})
def reset_default_password(): # We need a big timeout here, as the cassandra user actually # springs into existence some time after Cassandra has started # up and is accepting connections. with cassandra.connect('cassandra', 'cassandra', timeout=180) as session: # But before we close this security hole, we need to use these # credentials to create a different admin account. helpers.status_set('maintenance', 'Creating initial superuser account') username, password = '******', host.pwgen() pwhash = cassandra.encrypt_password(password) cassandra.ensure_user(session, username, pwhash, superuser=True) leadership.leader_set(username=username, password=password) helpers.status_set('maintenance', 'Changing default admin password') cassandra.query(session, 'ALTER USER cassandra WITH PASSWORD %s', ConsistencyLevel.ALL, (host.pwgen(),)) hookenv.leader_set(default_admin_password_changed=True)
def _save_state(self): self.msg('Publishing state'.format(self._name())) if hookenv.is_leader(): # sort_keys to ensure stability. raw = json.dumps(self.grants, sort_keys=True) hookenv.leader_set({self.key: raw}) local_unit = hookenv.local_unit() if self.relid is None: # No peer relation yet. Fallback to local state. self.msg('No peer relation. Saving local state') self._save_local_state(self.requests[local_unit]) else: # sort_keys to ensure stability. raw = json.dumps(self.requests[local_unit], sort_keys=True) hookenv.relation_set(self.relid, relation_settings={self.key: raw})
def config_changed(): if config('prefer-ipv6'): assert_charm_supports_ipv6() # Check if an upgrade was requested check_for_upgrade() log('Monitor hosts are ' + repr(get_mon_hosts())) sysctl_dict = config('sysctl') if sysctl_dict: create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-charm.conf') if relations_of_type('nrpe-external-master'): update_nrpe_config() if is_leader(): if not leader_get('fsid') or not leader_get('monitor-secret'): if config('fsid'): fsid = config('fsid') else: fsid = "{}".format(uuid.uuid1()) if config('monitor-secret'): mon_secret = config('monitor-secret') else: mon_secret = "{}".format(ceph.generate_monitor_secret()) status_set('maintenance', 'Creating FSID and Monitor Secret') opts = { 'fsid': fsid, 'monitor-secret': mon_secret, } log("Settings for the cluster are: {}".format(opts)) leader_set(opts) else: if leader_get('fsid') is None or leader_get('monitor-secret') is None: log('still waiting for leader to setup keys') status_set('waiting', 'Waiting for leader to setup keys') sys.exit(0) emit_cephconf() # Support use of single node ceph if not ceph.is_bootstrapped() and int(config('monitor-count')) == 1: status_set('maintenance', 'Bootstrapping single Ceph MON') ceph.bootstrap_monitor_cluster(config('monitor-secret')) ceph.wait_for_bootstrap() install_apparmor_profile()
def upload_signed_csr(*args): if not hookenv.is_leader(): hookenv.action_fail('Please run action on lead unit') return action_config = hookenv.action_get() root_ca = action_config.get('root-ca') if root_ca: hookenv.leader_set( {'root-ca': base64.b64decode(root_ca).decode("utf-8")}) vault_pki.upload_signed_csr( base64.b64decode(action_config['pem']).decode("utf-8"), allowed_domains=action_config.get('allowed-domains'), allow_subdomains=action_config.get('allow-subdomains'), enforce_hostnames=action_config.get('enforce-hostnames'), allow_any_name=action_config.get('allow-any-name'), max_ttl=action_config.get('max-ttl')) set_flag('charm.vault.ca.ready') set_flag('pki.backend.tuned') # reissue any certificates we might previously have provided set_flag('certificates.reissue.requested') set_flag('certificates.reissue.global.requested')
def generate_root_ca(*args): if not hookenv.is_leader(): hookenv.action_fail('Please run action on lead unit') return action_config = hookenv.action_get() root_ca = vault_pki.generate_root_ca( ttl=action_config['ttl'], allow_any_name=action_config['allow-any-name'], allowed_domains=action_config['allowed-domains'], allow_bare_domains=action_config['allow-bare-domains'], allow_subdomains=action_config['allow-subdomains'], allow_glob_domains=action_config['allow-glob-domains'], enforce_hostnames=action_config['enforce-hostnames'], max_ttl=action_config['max-ttl']) hookenv.leader_set({'root-ca': root_ca}) hookenv.action_set({'output': root_ca}) set_flag('charm.vault.ca.ready') set_flag('pki.backend.tuned') # reissue any certificates we might previously have provided set_flag('certificates.reissue.requested') set_flag('certificates.reissue.global.requested')
def cluster_departed(): if is_leader(): unit = remote_unit() for var_name in ["ip", "data_ip"]: ips = common_utils.json_loads(leader_get("controller_{}s".format(var_name)), dict()) if unit not in ips: return old_ip = ips.pop(unit) ip_list = common_utils.json_loads(leader_get("controller_{}_list".format(var_name)), list()) ip_list.remove(old_ip) log("{}_LIST: {} {}S: {}".format(var_name.upper(), str(ip_list), var_name.upper(), str(ips))) settings = { "controller_{}_list".format(var_name): json.dumps(ip_list), "controller_{}s".format(var_name): json.dumps(ips) } leader_set(settings=settings) update_northbound_relations() update_southbound_relations() update_issu_relations() utils.update_charm_status()
def process_snapd_timer(): """Set the snapd refresh timer on the leader so all cluster members (present and future) will refresh near the same time.""" # Get the current snapd refresh timer; we know layer-snap has set this # when the 'snap.refresh.set' flag is present. timer = snap.get(snapname="core", key="refresh.timer").decode("utf-8").strip() if not timer: # The core snap timer is empty. This likely means a subordinate timer # reset ours. Try to set it back to a previously leader-set value, # falling back to config if needed. Luckily, this should only happen # during subordinate install, so this should remain stable afterward. timer = leader_get("snapd_refresh") or hookenv.config("snapd_refresh") snap.set_refresh_timer(timer) # Ensure we have the timer known by snapd (it may differ from config). timer = snap.get(snapname="core", key="refresh.timer").decode("utf-8").strip() # The first time through, data_changed will be true. Subsequent calls # should only update leader data if something changed. if data_changed("etcd_snapd_refresh", timer): log("setting snapd_refresh timer to: {}".format(timer)) leader_set({"snapd_refresh": timer})
def prepare(): # Use the pause feature to stop mysql during the duration of the upgrade pause_unit_helper(register_configs()) # Set this unit to series upgrading set_unit_upgrading() # The leader will "bootstrap" with no wrep peers # Non-leaders will point only at the newly upgraded leader until the # cluster series upgrade is completed. # Set cluster_series_upgrading for the duration of the cluster series # upgrade. This will be unset with the action # complete-cluster-series-upgrade on the leader node. hosts = [] if not leader_get('cluster_series_upgrade_leader'): leader_set(cluster_series_upgrading=True) leader_set( cluster_series_upgrade_leader=get_relation_ip('cluster')) else: hosts = [leader_get('cluster_series_upgrade_leader')] # Render config render_config(hosts)
def master_departed(interface='master'): if is_leader(): reset_password = True new_slave_addresses = [] old_slave_addresses = list_replication_users() for rid in relation_ids(interface): if related_units(rid): reset_password = False for unit in related_units(rid): if not relation_get(attribute='slave_address', rid=rid, unit=unit): log("No relation data for {}".format(unit), level=DEBUG) return new_slave_addresses.append( relation_get(attribute='slave_address', rid=rid, unit=unit)) for old_slave_address in old_slave_addresses: if old_slave_address not in new_slave_addresses: delete_replication_user(old_slave_address) if reset_password: leader_set({'async-rep-password': ''})
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' address = get_ingress_address('cluster') cluster_connection_string = get_connection_string([address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({ 'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster }) # set registered state since if we ever become a follower, we will not need # to re-register set_state('etcd.registered') # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def create_trust(self, identity_service, cloud_admin_password): """Create trust between Trilio WLM service user and Cloud Admin """ if not hookenv.is_leader(): raise charms_openstack.plugins.classes.UnitNotLeaderException( "please run on leader unit") if not identity_service.base_data_complete(): raise IdentityServiceIncompleteException( "identity-service relation incomplete") # NOTE(jamespage): hardcode of admin username here may be brittle subprocess.check_call([ "workloadmgr", "--os-username", "admin", "--os-password", cloud_admin_password, "--os-auth-url", "{}://{}:{}/v3".format( identity_service.service_protocol(), identity_service.service_host(), identity_service.service_port(), ), "--os-user-domain-name", "admin_domain", "--os-project-domain-id", identity_service.admin_domain_id(), "--os-project-id", identity_service.admin_project_id(), "--os-project-name", "admin", "--os-region-name", hookenv.config("region"), "trust-create", "--is_cloud_trust", "True", "Admin", ]) hookenv.leader_set({"trusted": True})
def neutron_plugin_api_subordinate_relation_joined(relid=None): relation_data = {} if is_db_initialised(): db_migration_key = 'migrate-database-nonce' if not relid: relid = relation_id() leader_key = '{}-{}'.format(db_migration_key, relid) for unit in related_units(relid): nonce = relation_get(db_migration_key, rid=relid, unit=unit) if nonce: if is_leader() and leader_get(leader_key) != nonce: migrate_neutron_database(upgrade=True) # track nonce in leader storage to avoid superfluous # migrations leader_set({leader_key: nonce}) # set nonce back on relation to signal completion to other end # we do this regardless of leadership status so that # subordinates connected to non-leader units can proceed. relation_data[db_migration_key] = nonce relation_data['neutron-api-ready'] = 'no' if is_api_ready(CONFIGS): relation_data['neutron-api-ready'] = 'yes' if not manage_plugin(): neutron_cc_ctxt = NeutronCCContext()() plugin_instance = NeutronApiSDNContext() neutron_config_data = { k: v for k, v in neutron_cc_ctxt.items() if plugin_instance.is_allowed(k)} if neutron_config_data: relation_data['neutron_config_data'] = json.dumps( neutron_config_data) relation_set(relation_id=relid, **relation_data) # there is no race condition with the neutron service restart # as juju propagates the changes done in relation_set only after # the hook exists CONFIGS.write_all()
def upgrade_charm(): install() packages_removed = remove_old_packages() if packages_removed and not is_unit_paused_set(): log("Package purge detected, restarting services", "INFO") for s in services(): service_restart(s) update_nrpe_config() any_changed() for rid in relation_ids('cluster'): cluster_joined(relation_id=rid) # NOTE: (thedac) Currently there is no method to independently check if # ceilometer-upgrade has been run short of manual DB queries. # On upgrade-charm the leader node must assume it has already been run # and assert so with leader-set. If this is not done, then the upgrade from # the previous version of the charm will leave ceilometer in a blocked # state. if is_leader() and relation_ids("metric-service"): if not leader_get("ceilometer_upgrade_run"): log("Assuming ceilometer-upgrade has been run. If this is not the " "case, please run the ceilometer-upgrade action on the leader " "node.", level=WARNING) leader_set(ceilometer_upgrade_run=True)
def _address_changed(unit, ip, var_name): ip_list = common_utils.json_loads( leader_get("controller_{}_list".format(var_name)), list()) ips = common_utils.json_loads( leader_get("controller_{}s".format(var_name)), dict()) if ip in ip_list: return old_ip = ips.get(unit) if old_ip: index = ip_list.index(old_ip) ip_list[index] = ip ips[unit] = ip else: ip_list.append(ip) ips[unit] = ip log("{}_LIST: {} {}S: {}".format(var_name.upper(), str(ip_list), var_name.upper(), str(ips))) settings = { "controller_{}_list".format(var_name): json.dumps(ip_list), "controller_{}s".format(var_name): json.dumps(ips) } leader_set(settings=settings)
def check_ca_status(force=False): '''Called when the configuration values have changed.''' config = hookenv.config() if config.changed('root_certificate') or force: remove_state('certificate authority available') if is_leader(): root_cert = config.get('root_certificate') if root_cert: decoded_cert = _decode(root_cert) else: decoded_cert = None hookenv.log('Leader is creating the certificate authority.') certificate_authority = create_certificate_authority(decoded_cert) leader_set({'certificate_authority': certificate_authority}) install_ca(certificate_authority) # The leader can create the server certificate based on CA. hookenv.log('Leader is creating the server certificate.') # Remove the path characters from the unit name tls/0 -> tls_0. path_safe_name = hookenv.local_unit().replace('/', '_') create_server_certificate(path_safe_name) # The leader can create a client certificate one time. hookenv.log('Leader is creating the client certificate.') create_client_certificate()
def db_sync(self): """Override db_sync to catch exceptions for the s3 backend. Perform a database sync using the command defined in the self.sync_cmd attribute. The services defined in self.services are restarted after the database sync. """ if not self.db_sync_done() and hookenv.is_leader(): try: f = open("/var/log/gnocchi/gnocchi-upgrade.log", "w+") subprocess.check_call(self.sync_cmd, stdout=f, stderr=subprocess.STDOUT) hookenv.leader_set({'db-sync-done': True}) # Restart services immediately after db sync as # render_domain_config needs a working system self.restart_all() except subprocess.CalledProcessError as e: hookenv.status_set( 'blocked', 'An error occured while ' + 'running gnocchi-upgrade. Logs available ' + 'in /var/log/gnocchi/gnocchi-upgrade.log') hookenv.log(e, hookenv.DEBUG) raise e
def create_license(self, identity_service): if not hookenv.is_leader(): raise charms_openstack.plugins.classes.UnitNotLeaderException( "please run on leader unit") license_file = hookenv.resource_get("license") if not license_file: raise LicenseFileMissingException( "License file not provided as a resource") if not identity_service.base_data_complete(): raise IdentityServiceIncompleteException( "identity-service relation incomplete") subprocess.check_call([ "workloadmgr", "--os-username", identity_service.service_username(), "--os-password", identity_service.service_password(), "--os-auth-url", "{}://{}:{}/v3".format( identity_service.service_protocol(), identity_service.service_host(), identity_service.service_port(), ), "--os-user-domain-name", "service_domain", "--os-project-domain-id", identity_service.service_domain_id(), "--os-project-id", identity_service.service_tenant_id(), "--os-project-name", identity_service.service_tenant(), "--os-region-name", hookenv.config("region"), "license-create", license_file, ]) hookenv.leader_set({"licensed": True})
def upgrade(): if is_leader(): if is_unit_paused_set(): log('Unit is paused, skiping upgrade', level=INFO) return # Leader sets on upgrade leader_set(**{'leader-ip': get_relation_ip('cluster')}) configure_sstuser(sst_password()) if not leader_get('root-password') and leader_get('mysql.passwd'): leader_set(**{'root-password': leader_get('mysql.passwd')}) # On upgrade-charm we assume the cluster was complete at some point kvstore = kv() initial_clustered = kvstore.get(INITIAL_CLUSTERED_KEY, False) if not initial_clustered: kvstore.set(key=INITIAL_CLUSTERED_KEY, value=True) kvstore.flush() # broadcast the bootstrap-uuid wsrep_ready = get_wsrep_value('wsrep_ready') or "" if wsrep_ready.lower() in ['on', 'ready']: cluster_state_uuid = get_wsrep_value('wsrep_cluster_state_uuid') if cluster_state_uuid: mark_seeded() notify_bootstrapped(cluster_uuid=cluster_state_uuid) else: # Ensure all the peers have the bootstrap-uuid attribute set # as this is all happening during the upgrade-charm hook is reasonable # to expect the cluster is running. # Wait until the leader has set the try: update_bootstrap_uuid() except LeaderNoBootstrapUUIDError: status_set('waiting', "Waiting for bootstrap-uuid set by leader")
def notify_bootstrapped(cluster_rid=None, cluster_uuid=None): if cluster_rid: rids = [cluster_rid] else: rids = relation_ids('cluster') if not rids: log("No relation ids found for 'cluster'", level=INFO) return if not cluster_uuid: cluster_uuid = get_wsrep_value('wsrep_cluster_state_uuid') if not cluster_uuid: cluster_uuid = str(uuid.uuid4()) log( "Could not determine cluster uuid so using '%s' instead" % (cluster_uuid), INFO) log( "Notifying peers that percona is bootstrapped (uuid=%s)" % (cluster_uuid), DEBUG) for rid in rids: relation_set(relation_id=rid, **{'bootstrap-uuid': cluster_uuid}) if is_leader(): leader_set(**{'bootstrap-uuid': cluster_uuid})
def update_service_ips(): try: endpoints = _get_endpoints() except Exception as e: log("Couldn't detect services ips: {exc}".format(exc=e), level=WARNING) return False values = dict() def _check_key(key): val = endpoints.get(key) if val != leader_get(key): values[key] = val _check_key("compute_service_ip") _check_key("image_service_ip") _check_key("network_service_ip") if values: log("services ips has been changed: {ips}".format(ips=values)) leader_set(**values) return True log("services ips has not been changed.") return False
def leader_set(*args, **kw): '''Change leadership settings, per charmhelpers.core.hookenv.leader_set. Settings may either be passed in as a single dictionary, or using keyword arguments. All values must be strings. The leadership.set.{key} reactive state will be set while the leadership hook environment setting remains set. Changed leadership settings will set the leadership.changed.{key} and leadership.changed states. These states will remain set until the following hook. These state changes take effect immediately on the leader, and in future hooks run on non-leaders. In this way both leaders and non-leaders can share handlers, waiting on these states. ''' if args: if len(args) > 1: raise TypeError('leader_set() takes 1 positional argument but ' '{} were given'.format(len(args))) else: settings = dict(args[0]) else: settings = {} settings.update(kw) previous = unitdata.kv().getrange('leadership.settings.', strip=True) for key, value in settings.items(): if value != previous.get(key): reactive.set_state('leadership.changed.{}'.format(key)) reactive.set_state('leadership.changed') reactive.helpers.toggle_state('leadership.set.{}'.format(key), value is not None) hookenv.leader_set(settings) unitdata.kv().update(settings, prefix='leadership.settings.')
def relation_set(relation_id=None, relation_settings=None, **kwargs): """Attempt to use leader-set if supported in the current version of Juju, otherwise falls back on relation-set. Note that we only attempt to use leader-set if the provided relation_id is a peer relation id or no relation id is provided (in which case we assume we are within the peer relation context). """ try: if relation_id in relation_ids('cluster'): return leader_set(settings=relation_settings, **kwargs) else: raise NotImplementedError except NotImplementedError: return _relation_set(relation_id=relation_id, relation_settings=relation_settings, **kwargs)
def check_local_metadata(): if not is_leader(): return if not config.get("vrouter-provisioned"): if leader_get("local-metadata-provisioned"): # impossible to know if current hook is firing because # relation or leader is being removed lp #1469731 if not relation_ids("cluster"): unprovision_local_metadata() leader_set({"local-metadata-provisioned": ""}) return if config["local-metadata-server"]: if not leader_get("local-metadata-provisioned"): provision_local_metadata() leader_set({"local-metadata-provisioned": True}) elif leader_get("local-metadata-provisioned"): unprovision_local_metadata() leader_set({"local-metadata-provisioned": ""})
def leader_elected(): if not leader_get("db_user"): user = "******" password = uuid.uuid4().hex leader_set(db_user=user, db_password=password) if not leader_get("rabbitmq_user"): user = "******" password = uuid.uuid4().hex vhost = "contrail" leader_set(rabbitmq_user=user, rabbitmq_password=password, rabbitmq_vhost=vhost) update_northbound_relations() ip_list = leader_get("controller_ip_list") ips = get_controller_ips() if not ip_list: ip_list = ips.values() log("IP_LIST: {} IPS: {}".format(str(ip_list), str(ips))) leader_set(controller_ip_list=json.dumps(ip_list), controller_ips=json.dumps(ips)) # TODO: pass this list to all south/north relations else: current_ip_list = ips.values() dead_ips = set(ip_list).difference(current_ip_list) new_ips = set(current_ip_list).difference(ip_list) if new_ips: log("There are a new controllers that are not in the list: " + str(new_ips), level=ERROR) if dead_ips: log("There are a dead controllers that are in the list: " + str(dead_ips), level=ERROR) update_charm_status()
def bootstrap_source_relation_changed(): """Handles relation data changes on the bootstrap-source relation. The bootstrap-source relation to share remote bootstrap information with the ceph-mon charm. This relation is used to exchange the remote ceph-public-addresses which are used for the mon's, the fsid, and the monitor-secret. """ if not config('no-bootstrap'): status_set( 'blocked', 'Cannot join the bootstrap-source relation when ' 'no-bootstrap is False') return if not is_leader(): log('Deferring leader-setting updates to the leader unit') return curr_fsid = leader_get('fsid') curr_secret = leader_get('monitor-secret') for relid in relation_ids('bootstrap-source'): for unit in related_units(relid=relid): mon_secret = relation_get('monitor-secret', unit, relid) fsid = relation_get('fsid', unit, relid) if not (mon_secret and fsid): log('Relation data is not ready as the fsid or the ' 'monitor-secret are missing from the relation: ' 'mon_secret = {} and fsid = {} '.format(mon_secret, fsid)) continue if not (curr_fsid or curr_secret): curr_fsid = fsid curr_secret = mon_secret else: # The fsids and secrets need to match or the local monitors # will fail to join the mon cluster. If they don't, # bail because something needs to be investigated. assert curr_fsid == fsid, \ "bootstrap fsid '{}' != current fsid '{}'".format( fsid, curr_fsid) assert curr_secret == mon_secret, \ "bootstrap secret '{}' != current secret '{}'".format( mon_secret, curr_secret) opts = { 'fsid': fsid, 'monitor-secret': mon_secret, } try: leader_set(opts) log('Updating leader settings for fsid and monitor-secret ' 'from remote relation data: {}'.format(opts)) except Exception as e: # we're probably not the leader an exception occured # let's log it anyway. log("leader_set failed: {}".format(str(e))) # The leader unit needs to bootstrap itself as it won't receive the # leader-settings-changed hook elsewhere. if curr_fsid: mon_relation()
def config_changed(): # Get the cfg object so we can see if the no-bootstrap value has changed # and triggered this hook invocation cfg = config() if config('prefer-ipv6'): assert_charm_supports_ipv6() check_for_upgrade() log('Monitor hosts are ' + repr(get_mon_hosts())) sysctl_dict = config('sysctl') if sysctl_dict: create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-charm.conf') if relations_of_type('nrpe-external-master'): update_nrpe_config() if is_leader(): if not config('no-bootstrap'): if not leader_get('fsid') or not leader_get('monitor-secret'): fsid = "{}".format(uuid.uuid1()) if config('monitor-secret'): mon_secret = config('monitor-secret') else: mon_secret = "{}".format(ceph.generate_monitor_secret()) opts = { 'fsid': fsid, 'monitor-secret': mon_secret, } try: leader_set(opts) status_set('maintenance', 'Created FSID and Monitor Secret') log("Settings for the cluster are: {}".format(opts)) except Exception as e: # we're probably not the leader an exception occured # let's log it anyway. log("leader_set failed: {}".format(str(e))) elif (cfg.changed('no-bootstrap') and is_relation_made('bootstrap-source')): # User changed the no-bootstrap config option, we're the leader, # and the bootstrap-source relation has been made. The charm should # be in a blocked state indicating that the no-bootstrap option # must be set. This block is invoked when the user is trying to # get out of that scenario by enabling no-bootstrap. bootstrap_source_relation_changed() # unconditionally verify that the fsid and monitor-secret are set now # otherwise we exit until a leader does this. if leader_get('fsid') is None or leader_get('monitor-secret') is None: log('still waiting for leader to setup keys') status_set('waiting', 'Waiting for leader to setup keys') return emit_cephconf() # Support use of single node ceph if (not ceph.is_bootstrapped() and int(config('monitor-count')) == 1 and is_leader()): status_set('maintenance', 'Bootstrapping single Ceph MON') # the following call raises an exception if it can't add the keyring try: ceph.bootstrap_monitor_cluster(leader_get('monitor-secret')) except FileNotFoundError as e: # NOQA -- PEP8 is still PY2 log("Couldn't bootstrap the monitor yet: {}".format(str(e))) return ceph.wait_for_bootstrap() if cmp_pkgrevno('ceph', '12.0.0') >= 0: status_set('maintenance', 'Bootstrapping single Ceph MGR') ceph.bootstrap_manager() # Update client relations notify_client()
def master_relation_joined(relation_id=None): if not ready_for_service(legacy=False): log('unit not ready, deferring multisite configuration') return internal_url = '{}:{}'.format( canonical_url(CONFIGS, INTERNAL), listen_port(), ) endpoints = [internal_url] realm = config('realm') zonegroup = config('zonegroup') zone = config('zone') access_key = leader_get('access_key') secret = leader_get('secret') if not all((realm, zonegroup, zone)): return relation_set(relation_id=relation_id, realm=realm, zonegroup=zonegroup, url=endpoints[0], access_key=access_key, secret=secret) if not is_leader(): return if not leader_get('restart_nonce'): # NOTE(jamespage): # This is an ugly kludge to force creation of the required data # items in the .rgw.root pool prior to the radosgw process being # started; radosgw-admin does not currently have a way of doing # this operation but a period update will force it to be created. multisite.update_period(fatal=False) mutation = False if realm not in multisite.list_realms(): multisite.create_realm(realm, default=True) mutation = True if zonegroup not in multisite.list_zonegroups(): multisite.create_zonegroup(zonegroup, endpoints=endpoints, default=True, master=True, realm=realm) mutation = True if zone not in multisite.list_zones(): multisite.create_zone(zone, endpoints=endpoints, default=True, master=True, zonegroup=zonegroup) mutation = True if MULTISITE_SYSTEM_USER not in multisite.list_users(): access_key, secret = multisite.create_system_user( MULTISITE_SYSTEM_USER) multisite.modify_zone(zone, access_key=access_key, secret=secret) leader_set(access_key=access_key, secret=secret) mutation = True if mutation: multisite.update_period() service_restart(service_name()) leader_set(restart_nonce=str(uuid.uuid4())) relation_set(relation_id=relation_id, access_key=access_key, secret=secret)
def slave_relation_changed(relation_id=None, unit=None): if not is_leader(): return if not ready_for_service(legacy=False): log('unit not ready, deferring multisite configuration') return master_data = relation_get(rid=relation_id, unit=unit) if not all((master_data.get('realm'), master_data.get('zonegroup'), master_data.get('access_key'), master_data.get('secret'), master_data.get('url'))): log("Defer processing until master RGW has provided required data") return internal_url = '{}:{}'.format( canonical_url(CONFIGS, INTERNAL), listen_port(), ) endpoints = [internal_url] realm = config('realm') zonegroup = config('zonegroup') zone = config('zone') if (realm, zonegroup) != (master_data['realm'], master_data['zonegroup']): log("Mismatched configuration so stop multi-site configuration now") return if not leader_get('restart_nonce'): # NOTE(jamespage): # This is an ugly kludge to force creation of the required data # items in the .rgw.root pool prior to the radosgw process being # started; radosgw-admin does not currently have a way of doing # this operation but a period update will force it to be created. multisite.update_period(fatal=False) mutation = False if realm not in multisite.list_realms(): multisite.pull_realm(url=master_data['url'], access_key=master_data['access_key'], secret=master_data['secret']) multisite.pull_period(url=master_data['url'], access_key=master_data['access_key'], secret=master_data['secret']) multisite.set_default_realm(realm) mutation = True if zone not in multisite.list_zones(): multisite.create_zone(zone, endpoints=endpoints, default=False, master=False, zonegroup=zonegroup, access_key=master_data['access_key'], secret=master_data['secret']) mutation = True if mutation: multisite.update_period() service_restart(service_name()) leader_set(restart_nonce=str(uuid.uuid4()))
def upgrade_charm(): if is_leader() and not leader_get('namespace_tenants') == 'True': leader_set(namespace_tenants=False)
def db_departed_or_broken(): if is_leader(): leader_set({'db-initialised': None})
def set_mysql_password(self, username, password): """Update a mysql password for the provided username changing the leader settings To update root's password pass `None` in the username """ if username is None: username = '******' # get root password via leader-get, it may be that in the past (when # changes to root-password were not supported) the user changed the # password, so leader-get is more reliable source than # config.previous('root-password'). rel_username = None if username == 'root' else username cur_passwd = self.get_mysql_password(rel_username) # password that needs to be set new_passwd = password # update password for all users (e.g. root@localhost, root@::1, etc) try: self.connect(user=username, password=cur_passwd) cursor = self.connection.cursor() except MySQLdb.OperationalError as ex: raise MySQLSetPasswordError(('Cannot connect using password in ' 'leader settings (%s)') % ex, ex) try: # NOTE(freyes): Due to skip-name-resolve root@$HOSTNAME account # fails when using SET PASSWORD so using UPDATE against the # mysql.user table is needed, but changes to this table are not # replicated across the cluster, so this update needs to run in # all the nodes. More info at # http://galeracluster.com/documentation-webpages/userchanges.html release = CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) if release < 'bionic': SQL_UPDATE_PASSWD = ("UPDATE mysql.user SET password = "******"PASSWORD( %s ) WHERE user = %s;") else: # PXC 5.7 (introduced in Bionic) uses authentication_string SQL_UPDATE_PASSWD = ("UPDATE mysql.user SET " "authentication_string = " "PASSWORD( %s ) WHERE user = %s;") cursor.execute(SQL_UPDATE_PASSWD, (new_passwd, username)) cursor.execute('FLUSH PRIVILEGES;') self.connection.commit() except MySQLdb.OperationalError as ex: raise MySQLSetPasswordError('Cannot update password: %s' % str(ex), ex) finally: cursor.close() # check the password was changed try: self.connect(user=username, password=new_passwd) self.execute('select 1;') except MySQLdb.OperationalError as ex: raise MySQLSetPasswordError(('Cannot connect using new password: '******'%s') % str(ex), ex) if not is_leader(): log('Only the leader can set a new password in the relation', level=DEBUG) return for key in self.passwd_keys(rel_username): _password = leader_get(key) if _password: log('Updating password for %s (%s)' % (key, rel_username), level=DEBUG) leader_set(settings={key: new_passwd})
def db_sync(self): if not self.db_sync_done() and hookenv.is_leader(): subprocess.check_call(self.sync_cmd) hookenv.leader_set({'db-sync-done': True}) self.restart_all()
def contrail_kubernetes_config_changed(rel_id=None): if not is_leader(): return leader_set( settings={"kubernetes_workers": json.dumps(_collect_worker_ips())}) _notify_controller()