def test_configure_pacemaker_remote_resources(
         self,
         cleanup_remote_nodes,
         configure_maas_stonith_resource,
         configure_pacemaker_remote,
         add_location_rules_for_local_nodes,
         relation_ids,
         related_units,
         relation_get):
     rdata = {
         'pacemaker-remote:49': {
             'pacemaker-remote/0': {
                 'remote-hostname': '"node1"',
                 'remote-ip': '"10.0.0.10"',
                 'stonith-hostname': '"st-node1"'},
             'pacemaker-remote/1': {
                 'remote-ip': '"10.0.0.11"',
                 'remote-hostname': '"node2"'},
             'pacemaker-remote/2': {
                 'stonith-hostname': '"st-node3"'}}}
     relation_ids.side_effect = lambda x: rdata.keys()
     related_units.side_effect = lambda x: sorted(rdata[x].keys())
     relation_get.side_effect = lambda x, y, z: rdata[z][y].get(x, None)
     configure_pacemaker_remote.side_effect = \
         lambda x, y: 'res-{}'.format(x)
     utils.configure_pacemaker_remote_resources()
     remote_calls = [
         mock.call('node1', '10.0.0.10'),
         mock.call('node2', '10.0.0.11')]
     configure_pacemaker_remote.assert_has_calls(
         remote_calls,
         any_order=True)
     cleanup_remote_nodes.assert_called_once_with(
         ['res-node1', 'res-node2'])
Ejemplo n.º 2
0
 def test_configure_pacemaker_remote_resources(
         self, cleanup_remote_nodes, configure_maas_stonith_resource,
         configure_pacemaker_remote, add_location_rules_for_local_nodes,
         relation_ids, related_units, relation_get):
     rdata = {
         'pacemaker-remote:49': {
             'pacemaker-remote/0': {
                 'remote-hostname': '"node1"',
                 'remote-ip': '"10.0.0.10"',
                 'stonith-hostname': '"st-node1"'
             },
             'pacemaker-remote/1': {
                 'remote-ip': '"10.0.0.11"',
                 'remote-hostname': '"node2"'
             },
             'pacemaker-remote/2': {
                 'stonith-hostname': '"st-node3"'
             }
         }
     }
     relation_ids.side_effect = lambda x: rdata.keys()
     related_units.side_effect = lambda x: sorted(rdata[x].keys())
     relation_get.side_effect = lambda x, y, z: rdata[z][y].get(x, None)
     configure_pacemaker_remote.side_effect = \
         lambda x, y: 'res-{}'.format(x)
     utils.configure_pacemaker_remote_resources()
     remote_calls = [
         mock.call('node1', '10.0.0.10'),
         mock.call('node2', '10.0.0.11')
     ]
     configure_pacemaker_remote.assert_has_calls(remote_calls,
                                                 any_order=True)
     cleanup_remote_nodes.assert_called_once_with(
         ['res-node1', 'res-node2'])
Ejemplo n.º 3
0
def ha_relation_changed():
    # Check that we are related to a principle and that
    # it has already provided the required corosync configuration
    if not get_corosync_conf():
        log('Unable to configure corosync right now, deferring configuration',
            level=INFO)
        return

    if relation_ids('hanode'):
        log('Ready to form cluster - informing peers', level=DEBUG)
        relation_set(relation_id=relation_ids('hanode')[0], ready=True)
    else:
        log('Ready to form cluster, but not related to peers just yet',
            level=INFO)
        return

    # Check that there's enough nodes in order to perform the
    # configuration of the HA cluster
    if len(get_cluster_nodes()) < int(config('cluster_count')):
        log('Not enough nodes in cluster, deferring configuration', level=INFO)
        return

    relids = relation_ids('ha') or relation_ids('juju-info')
    if len(relids) == 1:  # Should only ever be one of these
        # Obtain relation information
        relid = relids[0]
        units = related_units(relid)
        if len(units) < 1:
            log('No principle unit found, deferring configuration', level=INFO)
            return

        unit = units[0]
        log('Parsing cluster configuration using rid: %s, unit: %s' %
            (relid, unit),
            level=DEBUG)
        resources = parse_data(relid, unit, 'resources')
        delete_resources = parse_data(relid, unit, 'delete_resources')
        resource_params = parse_data(relid, unit, 'resource_params')
        groups = parse_data(relid, unit, 'groups')
        ms = parse_data(relid, unit, 'ms')
        orders = parse_data(relid, unit, 'orders')
        colocations = parse_data(relid, unit, 'colocations')
        clones = parse_data(relid, unit, 'clones')
        locations = parse_data(relid, unit, 'locations')
        init_services = parse_data(relid, unit, 'init_services')
    else:
        log('Related to %s ha services' % (len(relids)), level=DEBUG)
        return

    if True in [ra.startswith('ocf:openstack') for ra in resources.values()]:
        apt_install('openstack-resource-agents')
    if True in [ra.startswith('ocf:ceph') for ra in resources.values()]:
        apt_install('ceph-resource-agents')

    if True in [ra.startswith('ocf:maas') for ra in resources.values()]:
        try:
            validate_dns_ha()
        except MAASConfigIncomplete as ex:
            log(ex.args[0], level=ERROR)
            status_set('blocked', ex.args[0])
            # if an exception is raised the hook will end up in error state
            # which will obfuscate the workload status and message.
            return

        log('Setting up access to MAAS API', level=INFO)
        setup_maas_api()
        # Update resource_parms for DNS resources to include MAAS URL and
        # credentials
        for resource in resource_params.keys():
            if resource.endswith("_hostname"):
                res_ipaddr = get_ip_addr_from_resource_params(
                    resource_params[resource])
                resource_params[resource] += (
                    ' maas_url="{}" maas_credentials="{}"'
                    ''.format(config('maas_url'), config('maas_credentials')))
                write_maas_dns_address(resource, res_ipaddr)

    # NOTE: this should be removed in 15.04 cycle as corosync
    # configuration should be set directly on subordinate
    configure_corosync()
    try_pcmk_wait()
    failure_timeout = config('failure_timeout')
    configure_cluster_global(failure_timeout)
    configure_monitor_host()
    configure_stonith()

    # Only configure the cluster resources
    # from the oldest peer unit.
    if is_leader():
        log('Setting cluster symmetry', level=INFO)
        set_cluster_symmetry()
        log('Deleting Resources' % (delete_resources), level=DEBUG)
        for res_name in delete_resources:
            if pcmk.crm_opt_exists(res_name):
                if ocf_file_exists(res_name, resources):
                    log('Stopping and deleting resource %s' % res_name,
                        level=DEBUG)
                    if pcmk.crm_res_running(res_name):
                        pcmk.commit('crm -w -F resource stop %s' % res_name)
                else:
                    log('Cleanuping and deleting resource %s' % res_name,
                        level=DEBUG)
                    pcmk.commit('crm resource cleanup %s' % res_name)
                # Daemon process may still be running after the upgrade.
                kill_legacy_ocf_daemon_process(res_name)
                pcmk.commit('crm -w -F configure delete %s' % res_name)

        log('Configuring Resources: %s' % (resources), level=DEBUG)
        for res_name, res_type in resources.items():
            # disable the service we are going to put in HA
            if res_type.split(':')[0] == "lsb":
                disable_lsb_services(res_type.split(':')[1])
                if service_running(res_type.split(':')[1]):
                    service_stop(res_type.split(':')[1])
            elif (len(init_services) != 0 and res_name in init_services
                  and init_services[res_name]):
                disable_upstart_services(init_services[res_name])
                if service_running(init_services[res_name]):
                    service_stop(init_services[res_name])
            # Put the services in HA, if not already done so
            # if not pcmk.is_resource_present(res_name):
            if not pcmk.crm_opt_exists(res_name):
                if res_name not in resource_params:
                    cmd = 'crm -w -F configure primitive %s %s' % (res_name,
                                                                   res_type)
                else:
                    cmd = ('crm -w -F configure primitive %s %s %s' %
                           (res_name, res_type, resource_params[res_name]))

                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)
                if config('monitor_host'):
                    cmd = ('crm -F configure location Ping-%s %s rule '
                           '-inf: pingd lte 0' % (res_name, res_name))
                    pcmk.commit(cmd)

            else:
                # the resource already exists so it will be updated.
                code = pcmk.crm_update_resource(res_name, res_type,
                                                resource_params.get(res_name))
                if code != 0:
                    msg = "Cannot update pcmkr resource: {}".format(res_name)
                    status_set('blocked', msg)
                    raise Exception(msg)

        log('Configuring Groups: %s' % (groups), level=DEBUG)
        for grp_name, grp_params in groups.items():
            if not pcmk.crm_opt_exists(grp_name):
                cmd = ('crm -w -F configure group %s %s' %
                       (grp_name, grp_params))
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG)
        for ms_name, ms_params in ms.items():
            if not pcmk.crm_opt_exists(ms_name):
                cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Orders: %s' % (orders), level=DEBUG)
        for ord_name, ord_params in orders.items():
            if not pcmk.crm_opt_exists(ord_name):
                cmd = 'crm -w -F configure order %s %s' % (ord_name,
                                                           ord_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Clones: %s' % clones, level=DEBUG)
        for cln_name, cln_params in clones.items():
            if not pcmk.crm_opt_exists(cln_name):
                cmd = 'crm -w -F configure clone %s %s' % (cln_name,
                                                           cln_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        # Ordering is important here, colocation and location constraints
        # reference resources. All resources referenced by the constraints
        # need to exist otherwise constraint creation will fail.

        log('Configuring Colocations: %s' % colocations, level=DEBUG)
        for col_name, col_params in colocations.items():
            if not pcmk.crm_opt_exists(col_name):
                cmd = 'crm -w -F configure colocation %s %s' % (col_name,
                                                                col_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Locations: %s' % locations, level=DEBUG)
        for loc_name, loc_params in locations.items():
            if not pcmk.crm_opt_exists(loc_name):
                cmd = 'crm -w -F configure location %s %s' % (loc_name,
                                                              loc_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        for res_name, res_type in resources.items():
            if len(init_services) != 0 and res_name in init_services:
                # Checks that the resources are running and started.
                # Ensure that clones are excluded as the resource is
                # not directly controllable (dealt with below)
                # Ensure that groups are cleaned up as a whole rather
                # than as individual resources.
                if (res_name not in clones.values()
                        and res_name not in groups.values()
                        and not pcmk.crm_res_running(res_name)):
                    # Just in case, cleanup the resources to ensure they get
                    # started in case they failed for some unrelated reason.
                    cmd = 'crm resource cleanup %s' % res_name
                    pcmk.commit(cmd)

        for cl_name in clones:
            # Always cleanup clones
            cmd = 'crm resource cleanup %s' % cl_name
            pcmk.commit(cmd)

        for grp_name in groups:
            # Always cleanup groups
            cmd = 'crm resource cleanup %s' % grp_name
            pcmk.commit(cmd)

        # All members of the cluster need to be registered before resources
        # that reference them can be created.
        if len(get_member_ready_nodes()) >= int(config('cluster_count')):
            log('Configuring any remote nodes', level=INFO)
            remote_resources = configure_pacemaker_remote_resources()
            stonith_resource = configure_pacemaker_remote_stonith_resource()
            resources.update(remote_resources)
            resources.update(stonith_resource)
            configure_resources_on_remotes(resources=resources,
                                           clones=clones,
                                           groups=groups)
        else:
            log('Deferring configuration of any remote nodes', level=INFO)

    for rel_id in relation_ids('ha'):
        relation_set(relation_id=rel_id, clustered="yes")

    # Inform peers that local configuration is complete and this member
    # is ready
    for rel_id in relation_ids('hanode'):
        relation_set(relation_id=rel_id, member_ready=True)
Ejemplo n.º 4
0
def ha_relation_changed():
    # Check that we are related to a principle and that
    # it has already provided the required corosync configuration
    if not get_corosync_conf():
        log('Unable to configure corosync right now, deferring configuration',
            level=INFO)
        return

    if relation_ids('hanode'):
        log('Ready to form cluster - informing peers', level=DEBUG)
        relation_set(relation_id=relation_ids('hanode')[0], ready=True)
    else:
        log('Ready to form cluster, but not related to peers just yet',
            level=INFO)
        return

    # Check that there's enough nodes in order to perform the
    # configuration of the HA cluster
    if len(get_cluster_nodes()) < int(config('cluster_count')):
        log('Not enough nodes in cluster, deferring configuration',
            level=INFO)
        return

    relids = relation_ids('ha') or relation_ids('juju-info')
    if len(relids) == 1:  # Should only ever be one of these
        # Obtain relation information
        relid = relids[0]
        units = related_units(relid)
        if len(units) < 1:
            log('No principle unit found, deferring configuration',
                level=INFO)
            return

        unit = units[0]
        log('Parsing cluster configuration using rid: %s, unit: %s' %
            (relid, unit), level=DEBUG)
        resources = parse_data(relid, unit, 'resources')
        delete_resources = parse_data(relid, unit, 'delete_resources')
        resource_params = parse_data(relid, unit, 'resource_params')
        groups = parse_data(relid, unit, 'groups')
        ms = parse_data(relid, unit, 'ms')
        orders = parse_data(relid, unit, 'orders')
        colocations = parse_data(relid, unit, 'colocations')
        clones = parse_data(relid, unit, 'clones')
        locations = parse_data(relid, unit, 'locations')
        init_services = parse_data(relid, unit, 'init_services')
    else:
        log('Related to %s ha services' % (len(relids)), level=DEBUG)
        return

    if True in [ra.startswith('ocf:openstack')
                for ra in resources.values()]:
        apt_install('openstack-resource-agents')
    if True in [ra.startswith('ocf:ceph')
                for ra in resources.values()]:
        apt_install('ceph-resource-agents')

    if True in [ra.startswith('ocf:maas')
                for ra in resources.values()]:
        try:
            validate_dns_ha()
        except MAASConfigIncomplete as ex:
            log(ex.args[0], level=ERROR)
            status_set('blocked', ex.args[0])
            # if an exception is raised the hook will end up in error state
            # which will obfuscate the workload status and message.
            return

        log('Setting up access to MAAS API', level=INFO)
        setup_maas_api()
        # Update resource_parms for DNS resources to include MAAS URL and
        # credentials
        for resource in resource_params.keys():
            if resource.endswith("_hostname"):
                res_ipaddr = get_ip_addr_from_resource_params(
                    resource_params[resource])
                resource_params[resource] += (
                    ' maas_url="{}" maas_credentials="{}"'
                    ''.format(config('maas_url'),
                              config('maas_credentials')))
                write_maas_dns_address(resource, res_ipaddr)

    # NOTE: this should be removed in 15.04 cycle as corosync
    # configuration should be set directly on subordinate
    configure_corosync()
    try_pcmk_wait()
    configure_cluster_global()
    configure_monitor_host()
    configure_stonith()

    # Only configure the cluster resources
    # from the oldest peer unit.
    if is_leader():
        log('Setting cluster symmetry', level=INFO)
        set_cluster_symmetry()
        log('Deleting Resources' % (delete_resources), level=DEBUG)
        for res_name in delete_resources:
            if pcmk.crm_opt_exists(res_name):
                if ocf_file_exists(res_name, resources):
                    log('Stopping and deleting resource %s' % res_name,
                        level=DEBUG)
                    if pcmk.crm_res_running(res_name):
                        pcmk.commit('crm -w -F resource stop %s' % res_name)
                else:
                    log('Cleanuping and deleting resource %s' % res_name,
                        level=DEBUG)
                    pcmk.commit('crm resource cleanup %s' % res_name)
                # Daemon process may still be running after the upgrade.
                kill_legacy_ocf_daemon_process(res_name)
                pcmk.commit('crm -w -F configure delete %s' % res_name)

        log('Configuring Resources: %s' % (resources), level=DEBUG)
        for res_name, res_type in resources.items():
            # disable the service we are going to put in HA
            if res_type.split(':')[0] == "lsb":
                disable_lsb_services(res_type.split(':')[1])
                if service_running(res_type.split(':')[1]):
                    service_stop(res_type.split(':')[1])
            elif (len(init_services) != 0 and
                  res_name in init_services and
                  init_services[res_name]):
                disable_upstart_services(init_services[res_name])
                if service_running(init_services[res_name]):
                    service_stop(init_services[res_name])
            # Put the services in HA, if not already done so
            # if not pcmk.is_resource_present(res_name):
            if not pcmk.crm_opt_exists(res_name):
                if res_name not in resource_params:
                    cmd = 'crm -w -F configure primitive %s %s' % (res_name,
                                                                   res_type)
                else:
                    cmd = ('crm -w -F configure primitive %s %s %s' %
                           (res_name, res_type, resource_params[res_name]))

                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)
                if config('monitor_host'):
                    cmd = ('crm -F configure location Ping-%s %s rule '
                           '-inf: pingd lte 0' % (res_name, res_name))
                    pcmk.commit(cmd)

            else:
                # the resource already exists so it will be updated.
                code = pcmk.crm_update_resource(res_name, res_type,
                                                resource_params.get(res_name))
                if code != 0:
                    msg = "Cannot update pcmkr resource: {}".format(res_name)
                    status_set('blocked', msg)
                    raise Exception(msg)

        log('Configuring Groups: %s' % (groups), level=DEBUG)
        for grp_name, grp_params in groups.items():
            if not pcmk.crm_opt_exists(grp_name):
                cmd = ('crm -w -F configure group %s %s' %
                       (grp_name, grp_params))
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG)
        for ms_name, ms_params in ms.items():
            if not pcmk.crm_opt_exists(ms_name):
                cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Orders: %s' % (orders), level=DEBUG)
        for ord_name, ord_params in orders.items():
            if not pcmk.crm_opt_exists(ord_name):
                cmd = 'crm -w -F configure order %s %s' % (ord_name,
                                                           ord_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Clones: %s' % clones, level=DEBUG)
        for cln_name, cln_params in clones.items():
            if not pcmk.crm_opt_exists(cln_name):
                cmd = 'crm -w -F configure clone %s %s' % (cln_name,
                                                           cln_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        # Ordering is important here, colocation and location constraints
        # reference resources. All resources referenced by the constraints
        # need to exist otherwise constraint creation will fail.

        log('Configuring Colocations: %s' % colocations, level=DEBUG)
        for col_name, col_params in colocations.items():
            if not pcmk.crm_opt_exists(col_name):
                cmd = 'crm -w -F configure colocation %s %s' % (col_name,
                                                                col_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Locations: %s' % locations, level=DEBUG)
        for loc_name, loc_params in locations.items():
            if not pcmk.crm_opt_exists(loc_name):
                cmd = 'crm -w -F configure location %s %s' % (loc_name,
                                                              loc_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        for res_name, res_type in resources.items():
            if len(init_services) != 0 and res_name in init_services:
                # Checks that the resources are running and started.
                # Ensure that clones are excluded as the resource is
                # not directly controllable (dealt with below)
                # Ensure that groups are cleaned up as a whole rather
                # than as individual resources.
                if (res_name not in clones.values() and
                    res_name not in groups.values() and
                        not pcmk.crm_res_running(res_name)):
                    # Just in case, cleanup the resources to ensure they get
                    # started in case they failed for some unrelated reason.
                    cmd = 'crm resource cleanup %s' % res_name
                    pcmk.commit(cmd)

        for cl_name in clones:
            # Always cleanup clones
            cmd = 'crm resource cleanup %s' % cl_name
            pcmk.commit(cmd)

        for grp_name in groups:
            # Always cleanup groups
            cmd = 'crm resource cleanup %s' % grp_name
            pcmk.commit(cmd)

        # All members of the cluster need to be registered before resources
        # that reference them can be created.
        if len(get_member_ready_nodes()) >= int(config('cluster_count')):
            log('Configuring any remote nodes', level=INFO)
            remote_resources = configure_pacemaker_remote_resources()
            stonith_resource = configure_pacemaker_remote_stonith_resource()
            resources.update(remote_resources)
            resources.update(stonith_resource)
            configure_resources_on_remotes(
                resources=resources,
                clones=clones,
                groups=groups)
        else:
            log('Deferring configuration of any remote nodes', level=INFO)

    for rel_id in relation_ids('ha'):
        relation_set(relation_id=rel_id, clustered="yes")

    # Inform peers that local configuration is complete and this member
    # is ready
    for rel_id in relation_ids('hanode'):
        relation_set(relation_id=rel_id, member_ready=True)