def ha_relation_changed(): # Check that we are related to a principle and that # it has already provided the required corosync configuration if not get_corosync_conf(): log('Unable to configure corosync right now, deferring configuration', level=INFO) return if relation_ids('hanode'): log('Ready to form cluster - informing peers', level=DEBUG) relation_set(relation_id=relation_ids('hanode')[0], ready=True) else: log('Ready to form cluster, but not related to peers just yet', level=INFO) return # Check that there's enough nodes in order to perform the # configuration of the HA cluster if len(get_cluster_nodes()) < int(config('cluster_count')): log('Not enough nodes in cluster, deferring configuration', level=INFO) return relids = relation_ids('ha') or relation_ids('juju-info') if len(relids) == 1: # Should only ever be one of these # Obtain relation information relid = relids[0] units = related_units(relid) if len(units) < 1: log('No principle unit found, deferring configuration', level=INFO) return unit = units[0] log('Parsing cluster configuration using rid: %s, unit: %s' % (relid, unit), level=DEBUG) resources = parse_data(relid, unit, 'resources') delete_resources = parse_data(relid, unit, 'delete_resources') resource_params = parse_data(relid, unit, 'resource_params') groups = parse_data(relid, unit, 'groups') ms = parse_data(relid, unit, 'ms') orders = parse_data(relid, unit, 'orders') colocations = parse_data(relid, unit, 'colocations') clones = parse_data(relid, unit, 'clones') locations = parse_data(relid, unit, 'locations') init_services = parse_data(relid, unit, 'init_services') else: log('Related to %s ha services' % (len(relids)), level=DEBUG) return if True in [ra.startswith('ocf:openstack') for ra in resources.values()]: apt_install('openstack-resource-agents') if True in [ra.startswith('ocf:ceph') for ra in resources.values()]: apt_install('ceph-resource-agents') if True in [ra.startswith('ocf:maas') for ra in resources.values()]: try: validate_dns_ha() except MAASConfigIncomplete as ex: log(ex.args[0], level=ERROR) status_set('blocked', ex.args[0]) # if an exception is raised the hook will end up in error state # which will obfuscate the workload status and message. return log('Setting up access to MAAS API', level=INFO) setup_maas_api() # Update resource_parms for DNS resources to include MAAS URL and # credentials for resource in resource_params.keys(): if resource.endswith("_hostname"): res_ipaddr = get_ip_addr_from_resource_params( resource_params[resource]) resource_params[resource] += ( ' maas_url="{}" maas_credentials="{}"' ''.format(config('maas_url'), config('maas_credentials'))) write_maas_dns_address(resource, res_ipaddr) # NOTE: this should be removed in 15.04 cycle as corosync # configuration should be set directly on subordinate configure_corosync() try_pcmk_wait() failure_timeout = config('failure_timeout') configure_cluster_global(failure_timeout) configure_monitor_host() configure_stonith() # Only configure the cluster resources # from the oldest peer unit. if is_leader(): log('Setting cluster symmetry', level=INFO) set_cluster_symmetry() log('Deleting Resources' % (delete_resources), level=DEBUG) for res_name in delete_resources: if pcmk.crm_opt_exists(res_name): if ocf_file_exists(res_name, resources): log('Stopping and deleting resource %s' % res_name, level=DEBUG) if pcmk.crm_res_running(res_name): pcmk.commit('crm -w -F resource stop %s' % res_name) else: log('Cleanuping and deleting resource %s' % res_name, level=DEBUG) pcmk.commit('crm resource cleanup %s' % res_name) # Daemon process may still be running after the upgrade. kill_legacy_ocf_daemon_process(res_name) pcmk.commit('crm -w -F configure delete %s' % res_name) log('Configuring Resources: %s' % (resources), level=DEBUG) for res_name, res_type in resources.items(): # disable the service we are going to put in HA if res_type.split(':')[0] == "lsb": disable_lsb_services(res_type.split(':')[1]) if service_running(res_type.split(':')[1]): service_stop(res_type.split(':')[1]) elif (len(init_services) != 0 and res_name in init_services and init_services[res_name]): disable_upstart_services(init_services[res_name]) if service_running(init_services[res_name]): service_stop(init_services[res_name]) # Put the services in HA, if not already done so # if not pcmk.is_resource_present(res_name): if not pcmk.crm_opt_exists(res_name): if res_name not in resource_params: cmd = 'crm -w -F configure primitive %s %s' % (res_name, res_type) else: cmd = ('crm -w -F configure primitive %s %s %s' % (res_name, res_type, resource_params[res_name])) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) if config('monitor_host'): cmd = ('crm -F configure location Ping-%s %s rule ' '-inf: pingd lte 0' % (res_name, res_name)) pcmk.commit(cmd) else: # the resource already exists so it will be updated. code = pcmk.crm_update_resource(res_name, res_type, resource_params.get(res_name)) if code != 0: msg = "Cannot update pcmkr resource: {}".format(res_name) status_set('blocked', msg) raise Exception(msg) log('Configuring Groups: %s' % (groups), level=DEBUG) for grp_name, grp_params in groups.items(): if not pcmk.crm_opt_exists(grp_name): cmd = ('crm -w -F configure group %s %s' % (grp_name, grp_params)) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG) for ms_name, ms_params in ms.items(): if not pcmk.crm_opt_exists(ms_name): cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Orders: %s' % (orders), level=DEBUG) for ord_name, ord_params in orders.items(): if not pcmk.crm_opt_exists(ord_name): cmd = 'crm -w -F configure order %s %s' % (ord_name, ord_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Clones: %s' % clones, level=DEBUG) for cln_name, cln_params in clones.items(): if not pcmk.crm_opt_exists(cln_name): cmd = 'crm -w -F configure clone %s %s' % (cln_name, cln_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) # Ordering is important here, colocation and location constraints # reference resources. All resources referenced by the constraints # need to exist otherwise constraint creation will fail. log('Configuring Colocations: %s' % colocations, level=DEBUG) for col_name, col_params in colocations.items(): if not pcmk.crm_opt_exists(col_name): cmd = 'crm -w -F configure colocation %s %s' % (col_name, col_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Locations: %s' % locations, level=DEBUG) for loc_name, loc_params in locations.items(): if not pcmk.crm_opt_exists(loc_name): cmd = 'crm -w -F configure location %s %s' % (loc_name, loc_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) for res_name, res_type in resources.items(): if len(init_services) != 0 and res_name in init_services: # Checks that the resources are running and started. # Ensure that clones are excluded as the resource is # not directly controllable (dealt with below) # Ensure that groups are cleaned up as a whole rather # than as individual resources. if (res_name not in clones.values() and res_name not in groups.values() and not pcmk.crm_res_running(res_name)): # Just in case, cleanup the resources to ensure they get # started in case they failed for some unrelated reason. cmd = 'crm resource cleanup %s' % res_name pcmk.commit(cmd) for cl_name in clones: # Always cleanup clones cmd = 'crm resource cleanup %s' % cl_name pcmk.commit(cmd) for grp_name in groups: # Always cleanup groups cmd = 'crm resource cleanup %s' % grp_name pcmk.commit(cmd) # All members of the cluster need to be registered before resources # that reference them can be created. if len(get_member_ready_nodes()) >= int(config('cluster_count')): log('Configuring any remote nodes', level=INFO) remote_resources = configure_pacemaker_remote_resources() stonith_resource = configure_pacemaker_remote_stonith_resource() resources.update(remote_resources) resources.update(stonith_resource) configure_resources_on_remotes(resources=resources, clones=clones, groups=groups) else: log('Deferring configuration of any remote nodes', level=INFO) for rel_id in relation_ids('ha'): relation_set(relation_id=rel_id, clustered="yes") # Inform peers that local configuration is complete and this member # is ready for rel_id in relation_ids('hanode'): relation_set(relation_id=rel_id, member_ready=True)
def test_get_member_ready_nodes(self, get_node_flags): utils.get_member_ready_nodes() get_node_flags.assert_called_once_with('member_ready')
def ha_relation_changed(): # Check that we are related to a principle and that # it has already provided the required corosync configuration if not get_corosync_conf(): log('Unable to configure corosync right now, deferring configuration', level=INFO) return if relation_ids('hanode'): log('Ready to form cluster - informing peers', level=DEBUG) relation_set(relation_id=relation_ids('hanode')[0], ready=True) else: log('Ready to form cluster, but not related to peers just yet', level=INFO) return # Check that there's enough nodes in order to perform the # configuration of the HA cluster if len(get_cluster_nodes()) < int(config('cluster_count')): log('Not enough nodes in cluster, deferring configuration', level=INFO) return relids = relation_ids('ha') or relation_ids('juju-info') if len(relids) == 1: # Should only ever be one of these # Obtain relation information relid = relids[0] units = related_units(relid) if len(units) < 1: log('No principle unit found, deferring configuration', level=INFO) return unit = units[0] log('Parsing cluster configuration using rid: %s, unit: %s' % (relid, unit), level=DEBUG) resources = parse_data(relid, unit, 'resources') delete_resources = parse_data(relid, unit, 'delete_resources') resource_params = parse_data(relid, unit, 'resource_params') groups = parse_data(relid, unit, 'groups') ms = parse_data(relid, unit, 'ms') orders = parse_data(relid, unit, 'orders') colocations = parse_data(relid, unit, 'colocations') clones = parse_data(relid, unit, 'clones') locations = parse_data(relid, unit, 'locations') init_services = parse_data(relid, unit, 'init_services') else: log('Related to %s ha services' % (len(relids)), level=DEBUG) return if True in [ra.startswith('ocf:openstack') for ra in resources.values()]: apt_install('openstack-resource-agents') if True in [ra.startswith('ocf:ceph') for ra in resources.values()]: apt_install('ceph-resource-agents') if True in [ra.startswith('ocf:maas') for ra in resources.values()]: try: validate_dns_ha() except MAASConfigIncomplete as ex: log(ex.args[0], level=ERROR) status_set('blocked', ex.args[0]) # if an exception is raised the hook will end up in error state # which will obfuscate the workload status and message. return log('Setting up access to MAAS API', level=INFO) setup_maas_api() # Update resource_parms for DNS resources to include MAAS URL and # credentials for resource in resource_params.keys(): if resource.endswith("_hostname"): res_ipaddr = get_ip_addr_from_resource_params( resource_params[resource]) resource_params[resource] += ( ' maas_url="{}" maas_credentials="{}"' ''.format(config('maas_url'), config('maas_credentials'))) write_maas_dns_address(resource, res_ipaddr) # NOTE: this should be removed in 15.04 cycle as corosync # configuration should be set directly on subordinate configure_corosync() try_pcmk_wait() configure_cluster_global() configure_monitor_host() configure_stonith() # Only configure the cluster resources # from the oldest peer unit. if is_leader(): log('Setting cluster symmetry', level=INFO) set_cluster_symmetry() log('Deleting Resources' % (delete_resources), level=DEBUG) for res_name in delete_resources: if pcmk.crm_opt_exists(res_name): if ocf_file_exists(res_name, resources): log('Stopping and deleting resource %s' % res_name, level=DEBUG) if pcmk.crm_res_running(res_name): pcmk.commit('crm -w -F resource stop %s' % res_name) else: log('Cleanuping and deleting resource %s' % res_name, level=DEBUG) pcmk.commit('crm resource cleanup %s' % res_name) # Daemon process may still be running after the upgrade. kill_legacy_ocf_daemon_process(res_name) pcmk.commit('crm -w -F configure delete %s' % res_name) log('Configuring Resources: %s' % (resources), level=DEBUG) for res_name, res_type in resources.items(): # disable the service we are going to put in HA if res_type.split(':')[0] == "lsb": disable_lsb_services(res_type.split(':')[1]) if service_running(res_type.split(':')[1]): service_stop(res_type.split(':')[1]) elif (len(init_services) != 0 and res_name in init_services and init_services[res_name]): disable_upstart_services(init_services[res_name]) if service_running(init_services[res_name]): service_stop(init_services[res_name]) # Put the services in HA, if not already done so # if not pcmk.is_resource_present(res_name): if not pcmk.crm_opt_exists(res_name): if res_name not in resource_params: cmd = 'crm -w -F configure primitive %s %s' % (res_name, res_type) else: cmd = ('crm -w -F configure primitive %s %s %s' % (res_name, res_type, resource_params[res_name])) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) if config('monitor_host'): cmd = ('crm -F configure location Ping-%s %s rule ' '-inf: pingd lte 0' % (res_name, res_name)) pcmk.commit(cmd) else: # the resource already exists so it will be updated. code = pcmk.crm_update_resource(res_name, res_type, resource_params.get(res_name)) if code != 0: msg = "Cannot update pcmkr resource: {}".format(res_name) status_set('blocked', msg) raise Exception(msg) log('Configuring Groups: %s' % (groups), level=DEBUG) for grp_name, grp_params in groups.items(): if not pcmk.crm_opt_exists(grp_name): cmd = ('crm -w -F configure group %s %s' % (grp_name, grp_params)) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG) for ms_name, ms_params in ms.items(): if not pcmk.crm_opt_exists(ms_name): cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Orders: %s' % (orders), level=DEBUG) for ord_name, ord_params in orders.items(): if not pcmk.crm_opt_exists(ord_name): cmd = 'crm -w -F configure order %s %s' % (ord_name, ord_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Clones: %s' % clones, level=DEBUG) for cln_name, cln_params in clones.items(): if not pcmk.crm_opt_exists(cln_name): cmd = 'crm -w -F configure clone %s %s' % (cln_name, cln_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) # Ordering is important here, colocation and location constraints # reference resources. All resources referenced by the constraints # need to exist otherwise constraint creation will fail. log('Configuring Colocations: %s' % colocations, level=DEBUG) for col_name, col_params in colocations.items(): if not pcmk.crm_opt_exists(col_name): cmd = 'crm -w -F configure colocation %s %s' % (col_name, col_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) log('Configuring Locations: %s' % locations, level=DEBUG) for loc_name, loc_params in locations.items(): if not pcmk.crm_opt_exists(loc_name): cmd = 'crm -w -F configure location %s %s' % (loc_name, loc_params) pcmk.commit(cmd) log('%s' % cmd, level=DEBUG) for res_name, res_type in resources.items(): if len(init_services) != 0 and res_name in init_services: # Checks that the resources are running and started. # Ensure that clones are excluded as the resource is # not directly controllable (dealt with below) # Ensure that groups are cleaned up as a whole rather # than as individual resources. if (res_name not in clones.values() and res_name not in groups.values() and not pcmk.crm_res_running(res_name)): # Just in case, cleanup the resources to ensure they get # started in case they failed for some unrelated reason. cmd = 'crm resource cleanup %s' % res_name pcmk.commit(cmd) for cl_name in clones: # Always cleanup clones cmd = 'crm resource cleanup %s' % cl_name pcmk.commit(cmd) for grp_name in groups: # Always cleanup groups cmd = 'crm resource cleanup %s' % grp_name pcmk.commit(cmd) # All members of the cluster need to be registered before resources # that reference them can be created. if len(get_member_ready_nodes()) >= int(config('cluster_count')): log('Configuring any remote nodes', level=INFO) remote_resources = configure_pacemaker_remote_resources() stonith_resource = configure_pacemaker_remote_stonith_resource() resources.update(remote_resources) resources.update(stonith_resource) configure_resources_on_remotes( resources=resources, clones=clones, groups=groups) else: log('Deferring configuration of any remote nodes', level=INFO) for rel_id in relation_ids('ha'): relation_set(relation_id=rel_id, clustered="yes") # Inform peers that local configuration is complete and this member # is ready for rel_id in relation_ids('hanode'): relation_set(relation_id=rel_id, member_ready=True)