def service_stop(self, service_name): res_name = self.service_to_resource_map.get(service_name, None) if not res_name or not peer_units(): super().service_stop(service_name) return # Stop a resource locally which will cause Pacemaker to start the # respective service (force-start operates locally). try: subprocess.run( [ 'crm_resource', '--wait', '--resource', res_name, '--force-stop' ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, ) except subprocess.CalledProcessError as e: if e.returncode == self._crm_no_such_resource_code(): err_msg = e.stderr.decode('utf-8') if 'not found' in err_msg: # Fallback to starting the service itself since: # 1. It could be that the resource hasn't been defined yet; # 2. This is a single-unit deployment without hacluster. super().service_stop(service_name) else: raise RuntimeError( CRM_ERR_MSG.format(e.returncode, err_msg)) from e else: raise RuntimeError(CRM_ERR_MSG.format(e.returncode, '')) from e
def initialise_pki(): """Create certs and keys required for token signing. Used for PKI and signing token revocation list. NOTE: keystone.conf [signing] section must be up-to-date prior to executing this. """ ensure_pki_cert_paths() if not peer_units() or is_ssl_cert_master(): log("Ensuring PKI token certs created", level=DEBUG) cmd = [ 'keystone-manage', 'pki_setup', '--keystone-user', 'keystone', '--keystone-group', 'keystone' ] check_call(cmd) # Ensure logfile has keystone perms since we may have just created it # with root. ensure_permissions('/var/log/keystone', user='******', group='keystone', perms=0o744) ensure_permissions('/var/log/keystone/keystone.log', user='******', group='keystone', perms=0o644) ensure_pki_dir_permissions()
def initialise_pki(): """Create certs and keys required for token signing. Used for PKI and signing token revocation list. NOTE: keystone.conf [signing] section must be up-to-date prior to executing this. """ if CompareOpenStackReleases(os_release('keystone-common')) >= 'pike': # pike dropped support for PKI token; skip function return ensure_pki_cert_paths() if not peer_units() or is_ssl_cert_master(): log("Ensuring PKI token certs created", level=DEBUG) if snap_install_requested(): cmd = ['/snap/bin/keystone-manage', 'pki_setup', '--keystone-user', KEYSTONE_USER, '--keystone-group', KEYSTONE_USER] _log_dir = '/var/snap/keystone/common/log' else: cmd = ['keystone-manage', 'pki_setup', '--keystone-user', KEYSTONE_USER, '--keystone-group', KEYSTONE_USER] _log_dir = '/var/log/keystone' check_call(cmd) # Ensure logfile has keystone perms since we may have just created it # with root. ensure_permissions(_log_dir, user=KEYSTONE_USER, group=KEYSTONE_USER, perms=0o744) ensure_permissions('{}/keystone.log'.format(_log_dir), user=KEYSTONE_USER, group=KEYSTONE_USER, perms=0o644) ensure_pki_dir_permissions()
def service_start(self, service_name): res_name = self.service_to_resource_map.get(service_name, None) if not res_name or not peer_units(): super().service_start(service_name) return # Start a resource locally which will cause Pacemaker to start the # respective service. 'crm resource start' will not start the service # if the resource should not be running on this unit. try: subprocess.run( ['crm', '--wait', 'resource', 'start', res_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, ) except subprocess.CalledProcessError as e: if e.returncode == CRM_EX_ERROR: err_msg = e.stderr.decode('utf-8') if 'not found' in err_msg: return else: raise RuntimeError( CRM_ERR_MSG.format(e.returncode, err_msg)) from e else: raise RuntimeError(CRM_ERR_MSG.format(e.returncode, '')) from e
def config_changed(): # if we are paused, delay doing any config changed hooks. It is forced on # the resume. if is_unit_paused_set(): return if config('prefer-ipv6'): assert_charm_supports_ipv6() hosts = get_cluster_hosts() clustered = len(hosts) > 1 bootstrapped = is_bootstrapped() # NOTE: only configure the cluster if we have sufficient peers. This only # applies if min-cluster-size is provided and is used to avoid extraneous # configuration changes and premature bootstrapping as the cluster is # deployed. if is_sufficient_peers(): try: # NOTE(jamespage): try with leadership election if is_leader(): log("Leader unit - bootstrap required=%s" % (not bootstrapped), DEBUG) render_config_restart_on_changed(clustered, hosts, bootstrap=not bootstrapped) elif bootstrapped: log("Cluster is bootstrapped - configuring mysql on this node", DEBUG) render_config_restart_on_changed(clustered, hosts) else: log("Not configuring", DEBUG) except NotImplementedError: # NOTE(jamespage): fallback to legacy behaviour. oldest = oldest_peer(peer_units()) if oldest: log("Leader unit - bootstrap required=%s" % (not bootstrapped), DEBUG) render_config_restart_on_changed(clustered, hosts, bootstrap=not bootstrapped) elif bootstrapped: log("Cluster is bootstrapped - configuring mysql on this node", DEBUG) render_config_restart_on_changed(clustered, hosts) else: log("Not configuring", DEBUG) # Notify any changes to the access network update_shared_db_rels() # (re)install pcmkr agent install_mysql_ocf() if relation_ids('ha'): # make sure all the HA resources are (re)created ha_relation_joined() if is_relation_made('nrpe-external-master'): update_nrpe_config()
def __call__(self): if isinstance(self.external_ports, basestring): self.external_ports = [self.external_ports] if not self.external_ports or not https(): return {} self.configure_cert() self.enable_modules() ctxt = {"namespace": self.service_namespace, "private_address": unit_get("private-address"), "endpoints": []} for ext_port in self.external_ports: if peer_units() or is_clustered(): int_port = determine_haproxy_port(ext_port) else: int_port = determine_api_port(ext_port) portmap = (int(ext_port), int(int_port)) ctxt["endpoints"].append(portmap) return ctxt
def initialise_pki(): """Create certs and keys required for PKI token signing. NOTE: keystone.conf [signing] section must be up-to-date prior to executing this. """ if not peer_units() or is_ssl_cert_master(): log("Ensuring PKI token certs created", level=DEBUG) cmd = ['keystone-manage', 'pki_setup', '--keystone-user', 'keystone', '--keystone-group', 'keystone'] check_call(cmd) # Ensure logfile has keystone perms since we may have just created it # with root. ensure_permissions('/var/log/keystone', user='******', group='keystone', perms=0o744) ensure_permissions('/var/log/keystone/keystone.log', user='******', group='keystone', perms=0o644) ensure_pki_dir_permissions()
def upgrade(): check_bootstrap = False try: if is_leader(): check_bootstrap = True except: if oldest_peer(peer_units()): check_bootstrap = True if check_bootstrap and not is_bootstrapped() and is_sufficient_peers(): # If this is the leader but we have not yet broadcast the cluster uuid # then do so now. wsrep_ready = get_wsrep_value('wsrep_ready') or "" if wsrep_ready.lower() in ['on', 'ready']: cluster_state_uuid = get_wsrep_value('wsrep_cluster_state_uuid') if cluster_state_uuid: mark_seeded() notify_bootstrapped(cluster_uuid=cluster_state_uuid) config_changed()
def initialise_pki(): """Create certs and keys required for token signing. Used for PKI and signing token revocation list. NOTE: keystone.conf [signing] section must be up-to-date prior to executing this. """ ensure_pki_cert_paths() if not peer_units() or is_ssl_cert_master(): log("Ensuring PKI token certs created", level=DEBUG) cmd = ["keystone-manage", "pki_setup", "--keystone-user", "keystone", "--keystone-group", "keystone"] check_call(cmd) # Ensure logfile has keystone perms since we may have just created it # with root. ensure_permissions("/var/log/keystone", user="******", group="keystone", perms=0o744) ensure_permissions("/var/log/keystone/keystone.log", user="******", group="keystone", perms=0o644) ensure_pki_dir_permissions()
def cluster_sync_rings(peers_only=False, builders_only=False): """Notify peer relations that they should stop their proxy services. Peer units will then be expected to do a relation_set with stop-proxy-service-ack set rq value. Once all peers have responded, the leader will send out notification to all relations that rings are available for sync. If peers_only is True, only peer units will be synced. This is typically used when only builder files have been changed. This should only be called by the leader unit. """ if not is_elected_leader(SWIFT_HA_RES): # Only the leader can do this. return if not peer_units(): # If we have no peer units just go ahead and broadcast to storage # relations. If we have been instructed to only broadcast to peers this # should do nothing. broker_token = get_broker_token() broadcast_rings_available(broker_token, peers=False, storage=not peers_only) return elif builders_only: # No need to stop proxies if only syncing builders between peers. broker_token = get_broker_token() broadcast_rings_available(broker_token, storage=False, builders_only=builders_only) return rel_ids = relation_ids('cluster') trigger = str(uuid.uuid4()) log("Sending request to stop proxy service to all peers (%s)" % (trigger), level=INFO) rq = SwiftProxyClusterRPC().stop_proxy_request(peers_only) for rid in rel_ids: relation_set(relation_id=rid, relation_settings=rq)
def __call__(self): if isinstance(self.external_ports, basestring): self.external_ports = [self.external_ports] if (not self.external_ports or not https()): return {} self.configure_cert() self.enable_modules() ctxt = { 'namespace': self.service_namespace, 'private_address': unit_get('private-address'), 'endpoints': [] } for ext_port in self.external_ports: if peer_units() or is_clustered(): int_port = determine_haproxy_port(ext_port) else: int_port = determine_api_port(ext_port) portmap = (int(ext_port), int(int_port)) ctxt['endpoints'].append(portmap) return ctxt
def cluster_sync_rings(peers_only=False, builders_only=False, token=None): """Notify peer relations that they should stop their proxy services. Peer units will then be expected to do a relation_set with stop-proxy-service-ack set rq value. Once all peers have responded, the leader will send out notification to all relations that rings are available for sync. If peers_only is True, only peer units will be synced. This is typically used when only builder files have been changed. This should only be called by the leader unit. """ if not is_elected_leader(SWIFT_HA_RES): # Only the leader can do this. return if not peer_units(): # If we have no peer units just go ahead and broadcast to storage # relations. If we have been instructed to only broadcast to peers this # should do nothing. broadcast_rings_available(broker_token=str(uuid.uuid4()), storage=not peers_only) return elif builders_only: if not token: token = str(uuid.uuid4()) # No need to stop proxies if only syncing builders between peers. broadcast_rings_available(storage=False, builders_only=True, broker_token=token) return log("Sending stop proxy service request to all peers", level=INFO) rq = SwiftProxyClusterRPC().stop_proxy_request(peers_only, token=token) for rid in relation_ids('cluster'): relation_set(relation_id=rid, relation_settings=rq)
def service_restart(self, service_name): res_name = self.service_to_resource_map.get(service_name, None) if not res_name or not peer_units(): super().service_restart(service_name) return # crm_resource does not have a --force-restart command to do a # local restart, however, --node can be specified to limit the # scope of a restart operation to the local node. The node name # is the hostname present in the UTS namespace unless higher # precedence overrides are specified in corosync.conf. try: subprocess.run( [ 'crm_resource', '--wait', '--resource', res_name, '--restart', '--node', socket.gethostname() ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, ) except subprocess.CalledProcessError as e: if e.returncode == self._crm_no_such_resource_code(): err_msg = e.stderr.decode('utf-8') if 'not found' in err_msg or 'is not running on' in err_msg: # crm_resource --restart returns CRM_EX_NOSUCH when a # resource is not running on the specified --node. Assume # it is running somewhere else in the cluster and that its # lifetime is managed by Pacemaker (i.e. don't attempt to # forcefully start it locally). return else: raise RuntimeError( CRM_ERR_MSG.format(e.returncode, err_msg)) from e else: raise RuntimeError(CRM_ERR_MSG.format(e.returncode, '')) from e
def test_peer_units(self): '''It lists all peer units for cluster relation''' peers = ['peer_node/1', 'peer_node/2'] self.relation_ids.return_value = ['cluster:0'] self.relation_list.return_value = peers self.assertEquals(peers, cluster_utils.peer_units())
def ha_relation_changed(): # Check that we are related to a principle and that # it has already provided the required corosync configuration if not get_corosync_conf(): log('Unable to configure corosync right now, deferring configuration', level=INFO) return if relation_ids('hanode'): log('Ready to form cluster - informing peers', level=DEBUG) relation_set(relation_id=relation_ids('hanode')[0], ready=True) else: log('Ready to form cluster, but not related to peers just yet', level=INFO) return # Check that there's enough nodes in order to perform the # configuration of the HA cluster if len(get_cluster_nodes()) < int(config('cluster_count')): log('Not enough nodes in cluster, deferring configuration', level=INFO) return relids = relation_ids('ha') if len(relids) == 1: # Should only ever be one of these # Obtain relation information relid = relids[0] units = related_units(relid) if len(units) < 1: log('No principle unit found, deferring configuration', level=INFO) return unit = units[0] log('Parsing cluster configuration using rid: %s, unit: %s' % (relid, unit), level=DEBUG) resources = parse_data(relid, unit, 'resources') delete_resources = parse_data(relid, unit, 'delete_resources') resource_params = parse_data(relid, unit, 'resource_params') groups = parse_data(relid, unit, 'groups') ms = parse_data(relid, unit, 'ms') orders = parse_data(relid, unit, 'orders') colocations = parse_data(relid, unit, 'colocations') clones = parse_data(relid, unit, 'clones') locations = parse_data(relid, unit, 'locations') init_services = parse_data(relid, unit, 'init_services') else: log('Related to %s ha services' % (len(relids)), level=DEBUG) return if True in [ ra.startswith('ocf:openstack') for ra in resources.itervalues() ]: apt_install('openstack-resource-agents') if True in [ra.startswith('ocf:ceph') for ra in resources.itervalues()]: apt_install('ceph-resource-agents') if True in [ra.startswith('ocf:maas') for ra in resources.values()]: if validate_dns_ha(): log('Setting up access to MAAS API', level=INFO) setup_maas_api() # Update resource_parms for DNS resources to include MAAS URL and # credentials for resource in resource_params.keys(): if resource.endswith("_hostname"): resource_params[resource] += ( ' maas_url="{}" maas_credentials="{}"' ''.format(config('maas_url'), config('maas_credentials'))) else: msg = ("DNS HA is requested but maas_url " "or maas_credentials are not set") status_set('blocked', msg) raise ValueError(msg) # NOTE: this should be removed in 15.04 cycle as corosync # configuration should be set directly on subordinate configure_corosync() pcmk.wait_for_pcmk() configure_cluster_global() configure_monitor_host() configure_stonith() # Only configure the cluster resources # from the oldest peer unit. if oldest_peer(peer_units()): log('Deleting Resources' % (delete_resources), level=DEBUG) for res_name in delete_resources: if pcmk.crm_opt_exists(res_name): if ocf_file_exists(res_name, resources): log('Stopping and deleting resource %s' % res_name, level=DEBUG) if pcmk.crm_res_running(res_name): pcmk.commit('crm -w -F resource stop %s' % res_name) else: log('Cleanuping and deleting resource %s' % res_name, level=DEBUG) pcmk.commit('crm resource cleanup %s' % res_name) # Daemon process may still be running after the upgrade. kill_legacy_ocf_daemon_process(res_name) pcmk.commit('crm -w -F configure delete %s' % res_name) log('Configuring Resources: %s' % (resources), level=DEBUG) for res_name, res_type in resources.iteritems(): # disable the service we are going to put in HA if res_type.split(':')[0] == "lsb": disable_lsb_services(res_type.split(':')[1]) if service_running(res_type.split(':')[1]): service_stop(res_type.split(':')[1]) elif (len(init_services) != 0 and res_name in init_services and init_services[res_name]): disable_upstart_services(init_services[res_name]) if service_running(init_services[res_name]): service_stop(init_services[res_name]) # Put the services in HA, if not already done so # if not pcmk.is_resource_present(res_name): if not pcmk.crm_opt_exists(res_name): if res_name not in resource_params: cmd = 'crm -w -F configure primitive %s %s' % (res_name, res_type) else: cmd = ('crm -w -F configure primitive %s %s %s' % (res_name, res_type, resource_params[res_name])) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) if config('monitor_host'): cmd = ('crm -F configure location Ping-%s %s rule ' '-inf: pingd lte 0' % (res_name, res_name)) pcmk.commit(cmd) log('Configuring Groups: %s' % (groups), level=DEBUG) for grp_name, grp_params in groups.iteritems(): if not pcmk.crm_opt_exists(grp_name): cmd = ('crm -w -F configure group %s %s' % (grp_name, grp_params)) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG) for ms_name, ms_params in ms.iteritems(): if not pcmk.crm_opt_exists(ms_name): cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Orders: %s' % (orders), level=DEBUG) for ord_name, ord_params in orders.iteritems(): if not pcmk.crm_opt_exists(ord_name): cmd = 'crm -w -F configure order %s %s' % (ord_name, ord_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Colocations: %s' % colocations, level=DEBUG) for col_name, col_params in colocations.iteritems(): if not pcmk.crm_opt_exists(col_name): cmd = 'crm -w -F configure colocation %s %s' % (col_name, col_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Clones: %s' % clones, level=DEBUG) for cln_name, cln_params in clones.iteritems(): if not pcmk.crm_opt_exists(cln_name): cmd = 'crm -w -F configure clone %s %s' % (cln_name, cln_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Locations: %s' % locations, level=DEBUG) for loc_name, loc_params in locations.iteritems(): if not pcmk.crm_opt_exists(loc_name): cmd = 'crm -w -F configure location %s %s' % (loc_name, loc_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) for res_name, res_type in resources.iteritems(): if len(init_services) != 0 and res_name in init_services: # Checks that the resources are running and started. # Ensure that clones are excluded as the resource is # not directly controllable (dealt with below) # Ensure that groups are cleaned up as a whole rather # than as individual resources. if (res_name not in clones.values() and res_name not in groups.values() and not pcmk.crm_res_running(res_name)): # Just in case, cleanup the resources to ensure they get # started in case they failed for some unrelated reason. cmd = 'crm resource cleanup %s' % res_name pcmk.commit(cmd) for cl_name in clones: # Always cleanup clones cmd = 'crm resource cleanup %s' % cl_name pcmk.commit(cmd) for grp_name in groups: # Always cleanup groups cmd = 'crm resource cleanup %s' % grp_name pcmk.commit(cmd) for rel_id in relation_ids('ha'): relation_set(relation_id=rel_id, clustered="yes")