def service_stop(self, service_name):
     res_name = self.service_to_resource_map.get(service_name, None)
     if not res_name or not peer_units():
         super().service_stop(service_name)
         return
     # Stop a resource locally which will cause Pacemaker to start the
     # respective service (force-start operates locally).
     try:
         subprocess.run(
             [
                 'crm_resource', '--wait', '--resource', res_name,
                 '--force-stop'
             ],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             check=True,
         )
     except subprocess.CalledProcessError as e:
         if e.returncode == self._crm_no_such_resource_code():
             err_msg = e.stderr.decode('utf-8')
             if 'not found' in err_msg:
                 # Fallback to starting the service itself since:
                 # 1. It could be that the resource hasn't been defined yet;
                 # 2. This is a single-unit deployment without hacluster.
                 super().service_stop(service_name)
             else:
                 raise RuntimeError(
                     CRM_ERR_MSG.format(e.returncode, err_msg)) from e
         else:
             raise RuntimeError(CRM_ERR_MSG.format(e.returncode, '')) from e
Beispiel #2
0
def initialise_pki():
    """Create certs and keys required for token signing.

    Used for PKI and signing token revocation list.

    NOTE: keystone.conf [signing] section must be up-to-date prior to
          executing this.
    """
    ensure_pki_cert_paths()
    if not peer_units() or is_ssl_cert_master():
        log("Ensuring PKI token certs created", level=DEBUG)
        cmd = [
            'keystone-manage', 'pki_setup', '--keystone-user', 'keystone',
            '--keystone-group', 'keystone'
        ]
        check_call(cmd)

        # Ensure logfile has keystone perms since we may have just created it
        # with root.
        ensure_permissions('/var/log/keystone',
                           user='******',
                           group='keystone',
                           perms=0o744)
        ensure_permissions('/var/log/keystone/keystone.log',
                           user='******',
                           group='keystone',
                           perms=0o644)

    ensure_pki_dir_permissions()
Beispiel #3
0
def initialise_pki():
    """Create certs and keys required for token signing.

    Used for PKI and signing token revocation list.

    NOTE: keystone.conf [signing] section must be up-to-date prior to
          executing this.
    """
    if CompareOpenStackReleases(os_release('keystone-common')) >= 'pike':
        # pike dropped support for PKI token; skip function
        return
    ensure_pki_cert_paths()
    if not peer_units() or is_ssl_cert_master():
        log("Ensuring PKI token certs created", level=DEBUG)
        if snap_install_requested():
            cmd = ['/snap/bin/keystone-manage', 'pki_setup',
                   '--keystone-user', KEYSTONE_USER,
                   '--keystone-group', KEYSTONE_USER]
            _log_dir = '/var/snap/keystone/common/log'
        else:
            cmd = ['keystone-manage', 'pki_setup',
                   '--keystone-user', KEYSTONE_USER,
                   '--keystone-group', KEYSTONE_USER]
            _log_dir = '/var/log/keystone'
        check_call(cmd)

        # Ensure logfile has keystone perms since we may have just created it
        # with root.
        ensure_permissions(_log_dir, user=KEYSTONE_USER,
                           group=KEYSTONE_USER, perms=0o744)
        ensure_permissions('{}/keystone.log'.format(_log_dir),
                           user=KEYSTONE_USER, group=KEYSTONE_USER,
                           perms=0o644)

    ensure_pki_dir_permissions()
 def service_start(self, service_name):
     res_name = self.service_to_resource_map.get(service_name, None)
     if not res_name or not peer_units():
         super().service_start(service_name)
         return
     # Start a resource locally which will cause Pacemaker to start the
     # respective service. 'crm resource start' will not start the service
     # if the resource should not be running on this unit.
     try:
         subprocess.run(
             ['crm', '--wait', 'resource', 'start', res_name],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             check=True,
         )
     except subprocess.CalledProcessError as e:
         if e.returncode == CRM_EX_ERROR:
             err_msg = e.stderr.decode('utf-8')
             if 'not found' in err_msg:
                 return
             else:
                 raise RuntimeError(
                     CRM_ERR_MSG.format(e.returncode, err_msg)) from e
         else:
             raise RuntimeError(CRM_ERR_MSG.format(e.returncode, '')) from e
def config_changed():
    # if we are paused, delay doing any config changed hooks.  It is forced on
    # the resume.
    if is_unit_paused_set():
        return

    if config('prefer-ipv6'):
        assert_charm_supports_ipv6()

    hosts = get_cluster_hosts()
    clustered = len(hosts) > 1
    bootstrapped = is_bootstrapped()

    # NOTE: only configure the cluster if we have sufficient peers. This only
    # applies if min-cluster-size is provided and is used to avoid extraneous
    # configuration changes and premature bootstrapping as the cluster is
    # deployed.
    if is_sufficient_peers():
        try:
            # NOTE(jamespage): try with leadership election
            if is_leader():
                log("Leader unit - bootstrap required=%s" % (not bootstrapped),
                    DEBUG)
                render_config_restart_on_changed(clustered, hosts,
                                                 bootstrap=not bootstrapped)
            elif bootstrapped:
                log("Cluster is bootstrapped - configuring mysql on this node",
                    DEBUG)
                render_config_restart_on_changed(clustered, hosts)
            else:
                log("Not configuring", DEBUG)

        except NotImplementedError:
            # NOTE(jamespage): fallback to legacy behaviour.
            oldest = oldest_peer(peer_units())
            if oldest:
                log("Leader unit - bootstrap required=%s" % (not bootstrapped),
                    DEBUG)
                render_config_restart_on_changed(clustered, hosts,
                                                 bootstrap=not bootstrapped)
            elif bootstrapped:
                log("Cluster is bootstrapped - configuring mysql on this node",
                    DEBUG)
                render_config_restart_on_changed(clustered, hosts)
            else:
                log("Not configuring", DEBUG)

    # Notify any changes to the access network
    update_shared_db_rels()

    # (re)install pcmkr agent
    install_mysql_ocf()

    if relation_ids('ha'):
        # make sure all the HA resources are (re)created
        ha_relation_joined()

    if is_relation_made('nrpe-external-master'):
        update_nrpe_config()
    def __call__(self):
        if isinstance(self.external_ports, basestring):
            self.external_ports = [self.external_ports]
        if not self.external_ports or not https():
            return {}

        self.configure_cert()
        self.enable_modules()

        ctxt = {"namespace": self.service_namespace, "private_address": unit_get("private-address"), "endpoints": []}
        for ext_port in self.external_ports:
            if peer_units() or is_clustered():
                int_port = determine_haproxy_port(ext_port)
            else:
                int_port = determine_api_port(ext_port)
            portmap = (int(ext_port), int(int_port))
            ctxt["endpoints"].append(portmap)
        return ctxt
Beispiel #7
0
def initialise_pki():
    """Create certs and keys required for PKI token signing.

    NOTE: keystone.conf [signing] section must be up-to-date prior to
          executing this.
    """
    if not peer_units() or is_ssl_cert_master():
        log("Ensuring PKI token certs created", level=DEBUG)
        cmd = ['keystone-manage', 'pki_setup', '--keystone-user', 'keystone',
               '--keystone-group', 'keystone']
        check_call(cmd)

        # Ensure logfile has keystone perms since we may have just created it
        # with root.
        ensure_permissions('/var/log/keystone', user='******',
                           group='keystone', perms=0o744)
        ensure_permissions('/var/log/keystone/keystone.log', user='******',
                           group='keystone', perms=0o644)

    ensure_pki_dir_permissions()
def upgrade():
    check_bootstrap = False
    try:
        if is_leader():
            check_bootstrap = True
    except:
        if oldest_peer(peer_units()):
            check_bootstrap = True

    if check_bootstrap and not is_bootstrapped() and is_sufficient_peers():
        # If this is the leader but we have not yet broadcast the cluster uuid
        # then do so now.
        wsrep_ready = get_wsrep_value('wsrep_ready') or ""
        if wsrep_ready.lower() in ['on', 'ready']:
            cluster_state_uuid = get_wsrep_value('wsrep_cluster_state_uuid')
            if cluster_state_uuid:
                mark_seeded()
                notify_bootstrapped(cluster_uuid=cluster_state_uuid)

    config_changed()
Beispiel #9
0
def initialise_pki():
    """Create certs and keys required for token signing.

    Used for PKI and signing token revocation list.

    NOTE: keystone.conf [signing] section must be up-to-date prior to
          executing this.
    """
    ensure_pki_cert_paths()
    if not peer_units() or is_ssl_cert_master():
        log("Ensuring PKI token certs created", level=DEBUG)
        cmd = ["keystone-manage", "pki_setup", "--keystone-user", "keystone", "--keystone-group", "keystone"]
        check_call(cmd)

        # Ensure logfile has keystone perms since we may have just created it
        # with root.
        ensure_permissions("/var/log/keystone", user="******", group="keystone", perms=0o744)
        ensure_permissions("/var/log/keystone/keystone.log", user="******", group="keystone", perms=0o644)

    ensure_pki_dir_permissions()
def cluster_sync_rings(peers_only=False, builders_only=False):
    """Notify peer relations that they should stop their proxy services.

    Peer units will then be expected to do a relation_set with
    stop-proxy-service-ack set rq value. Once all peers have responded, the
    leader will send out notification to all relations that rings are available
    for sync.

    If peers_only is True, only peer units will be synced. This is typically
    used when only builder files have been changed.

    This should only be called by the leader unit.
    """
    if not is_elected_leader(SWIFT_HA_RES):
        # Only the leader can do this.
        return

    if not peer_units():
        # If we have no peer units just go ahead and broadcast to storage
        # relations. If we have been instructed to only broadcast to peers this
        # should do nothing.
        broker_token = get_broker_token()
        broadcast_rings_available(broker_token, peers=False,
                                  storage=not peers_only)
        return
    elif builders_only:
        # No need to stop proxies if only syncing builders between peers.
        broker_token = get_broker_token()
        broadcast_rings_available(broker_token, storage=False,
                                  builders_only=builders_only)
        return

    rel_ids = relation_ids('cluster')
    trigger = str(uuid.uuid4())

    log("Sending request to stop proxy service to all peers (%s)" % (trigger),
        level=INFO)
    rq = SwiftProxyClusterRPC().stop_proxy_request(peers_only)
    for rid in rel_ids:
        relation_set(relation_id=rid, relation_settings=rq)
Beispiel #11
0
    def __call__(self):
        if isinstance(self.external_ports, basestring):
            self.external_ports = [self.external_ports]
        if (not self.external_ports or not https()):
            return {}

        self.configure_cert()
        self.enable_modules()

        ctxt = {
            'namespace': self.service_namespace,
            'private_address': unit_get('private-address'),
            'endpoints': []
        }
        for ext_port in self.external_ports:
            if peer_units() or is_clustered():
                int_port = determine_haproxy_port(ext_port)
            else:
                int_port = determine_api_port(ext_port)
            portmap = (int(ext_port), int(int_port))
            ctxt['endpoints'].append(portmap)
        return ctxt
def cluster_sync_rings(peers_only=False, builders_only=False, token=None):
    """Notify peer relations that they should stop their proxy services.

    Peer units will then be expected to do a relation_set with
    stop-proxy-service-ack set rq value. Once all peers have responded, the
    leader will send out notification to all relations that rings are available
    for sync.

    If peers_only is True, only peer units will be synced. This is typically
    used when only builder files have been changed.

    This should only be called by the leader unit.
    """
    if not is_elected_leader(SWIFT_HA_RES):
        # Only the leader can do this.
        return

    if not peer_units():
        # If we have no peer units just go ahead and broadcast to storage
        # relations. If we have been instructed to only broadcast to peers this
        # should do nothing.
        broadcast_rings_available(broker_token=str(uuid.uuid4()),
                                  storage=not peers_only)
        return
    elif builders_only:
        if not token:
            token = str(uuid.uuid4())

        # No need to stop proxies if only syncing builders between peers.
        broadcast_rings_available(storage=False,
                                  builders_only=True,
                                  broker_token=token)
        return

    log("Sending stop proxy service request to all peers", level=INFO)
    rq = SwiftProxyClusterRPC().stop_proxy_request(peers_only, token=token)
    for rid in relation_ids('cluster'):
        relation_set(relation_id=rid, relation_settings=rq)
 def service_restart(self, service_name):
     res_name = self.service_to_resource_map.get(service_name, None)
     if not res_name or not peer_units():
         super().service_restart(service_name)
         return
     # crm_resource does not have a --force-restart command to do a
     # local restart, however, --node can be specified to limit the
     # scope of a restart operation to the local node. The node name
     # is the hostname present in the UTS namespace unless higher
     # precedence overrides are specified in corosync.conf.
     try:
         subprocess.run(
             [
                 'crm_resource', '--wait', '--resource', res_name,
                 '--restart', '--node',
                 socket.gethostname()
             ],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             check=True,
         )
     except subprocess.CalledProcessError as e:
         if e.returncode == self._crm_no_such_resource_code():
             err_msg = e.stderr.decode('utf-8')
             if 'not found' in err_msg or 'is not running on' in err_msg:
                 # crm_resource --restart returns CRM_EX_NOSUCH when a
                 # resource is not running on the specified --node. Assume
                 # it is running somewhere else in the cluster and that its
                 # lifetime is managed by Pacemaker (i.e. don't attempt to
                 # forcefully start it locally).
                 return
             else:
                 raise RuntimeError(
                     CRM_ERR_MSG.format(e.returncode, err_msg)) from e
         else:
             raise RuntimeError(CRM_ERR_MSG.format(e.returncode, '')) from e
Beispiel #14
0
 def test_peer_units(self):
     '''It lists all peer units for cluster relation'''
     peers = ['peer_node/1', 'peer_node/2']
     self.relation_ids.return_value = ['cluster:0']
     self.relation_list.return_value = peers
     self.assertEquals(peers, cluster_utils.peer_units())
Beispiel #15
0
def ha_relation_changed():
    # Check that we are related to a principle and that
    # it has already provided the required corosync configuration
    if not get_corosync_conf():
        log('Unable to configure corosync right now, deferring configuration',
            level=INFO)
        return

    if relation_ids('hanode'):
        log('Ready to form cluster - informing peers', level=DEBUG)
        relation_set(relation_id=relation_ids('hanode')[0], ready=True)
    else:
        log('Ready to form cluster, but not related to peers just yet',
            level=INFO)
        return

    # Check that there's enough nodes in order to perform the
    # configuration of the HA cluster
    if len(get_cluster_nodes()) < int(config('cluster_count')):
        log('Not enough nodes in cluster, deferring configuration', level=INFO)
        return

    relids = relation_ids('ha')
    if len(relids) == 1:  # Should only ever be one of these
        # Obtain relation information
        relid = relids[0]
        units = related_units(relid)
        if len(units) < 1:
            log('No principle unit found, deferring configuration', level=INFO)
            return

        unit = units[0]
        log('Parsing cluster configuration using rid: %s, unit: %s' %
            (relid, unit),
            level=DEBUG)
        resources = parse_data(relid, unit, 'resources')
        delete_resources = parse_data(relid, unit, 'delete_resources')
        resource_params = parse_data(relid, unit, 'resource_params')
        groups = parse_data(relid, unit, 'groups')
        ms = parse_data(relid, unit, 'ms')
        orders = parse_data(relid, unit, 'orders')
        colocations = parse_data(relid, unit, 'colocations')
        clones = parse_data(relid, unit, 'clones')
        locations = parse_data(relid, unit, 'locations')
        init_services = parse_data(relid, unit, 'init_services')
    else:
        log('Related to %s ha services' % (len(relids)), level=DEBUG)
        return

    if True in [
            ra.startswith('ocf:openstack') for ra in resources.itervalues()
    ]:
        apt_install('openstack-resource-agents')
    if True in [ra.startswith('ocf:ceph') for ra in resources.itervalues()]:
        apt_install('ceph-resource-agents')

    if True in [ra.startswith('ocf:maas') for ra in resources.values()]:
        if validate_dns_ha():
            log('Setting up access to MAAS API', level=INFO)
            setup_maas_api()
            # Update resource_parms for DNS resources to include MAAS URL and
            # credentials
            for resource in resource_params.keys():
                if resource.endswith("_hostname"):
                    resource_params[resource] += (
                        ' maas_url="{}" maas_credentials="{}"'
                        ''.format(config('maas_url'),
                                  config('maas_credentials')))
        else:
            msg = ("DNS HA is requested but maas_url "
                   "or maas_credentials are not set")
            status_set('blocked', msg)
            raise ValueError(msg)

    # NOTE: this should be removed in 15.04 cycle as corosync
    # configuration should be set directly on subordinate
    configure_corosync()
    pcmk.wait_for_pcmk()
    configure_cluster_global()
    configure_monitor_host()
    configure_stonith()

    # Only configure the cluster resources
    # from the oldest peer unit.
    if oldest_peer(peer_units()):
        log('Deleting Resources' % (delete_resources), level=DEBUG)
        for res_name in delete_resources:
            if pcmk.crm_opt_exists(res_name):
                if ocf_file_exists(res_name, resources):
                    log('Stopping and deleting resource %s' % res_name,
                        level=DEBUG)
                    if pcmk.crm_res_running(res_name):
                        pcmk.commit('crm -w -F resource stop %s' % res_name)
                else:
                    log('Cleanuping and deleting resource %s' % res_name,
                        level=DEBUG)
                    pcmk.commit('crm resource cleanup %s' % res_name)
                # Daemon process may still be running after the upgrade.
                kill_legacy_ocf_daemon_process(res_name)
                pcmk.commit('crm -w -F configure delete %s' % res_name)

        log('Configuring Resources: %s' % (resources), level=DEBUG)
        for res_name, res_type in resources.iteritems():
            # disable the service we are going to put in HA
            if res_type.split(':')[0] == "lsb":
                disable_lsb_services(res_type.split(':')[1])
                if service_running(res_type.split(':')[1]):
                    service_stop(res_type.split(':')[1])
            elif (len(init_services) != 0 and res_name in init_services
                  and init_services[res_name]):
                disable_upstart_services(init_services[res_name])
                if service_running(init_services[res_name]):
                    service_stop(init_services[res_name])
            # Put the services in HA, if not already done so
            # if not pcmk.is_resource_present(res_name):
            if not pcmk.crm_opt_exists(res_name):
                if res_name not in resource_params:
                    cmd = 'crm -w -F configure primitive %s %s' % (res_name,
                                                                   res_type)
                else:
                    cmd = ('crm -w -F configure primitive %s %s %s' %
                           (res_name, res_type, resource_params[res_name]))

                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)
                if config('monitor_host'):
                    cmd = ('crm -F configure location Ping-%s %s rule '
                           '-inf: pingd lte 0' % (res_name, res_name))
                    pcmk.commit(cmd)

        log('Configuring Groups: %s' % (groups), level=DEBUG)
        for grp_name, grp_params in groups.iteritems():
            if not pcmk.crm_opt_exists(grp_name):
                cmd = ('crm -w -F configure group %s %s' %
                       (grp_name, grp_params))
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG)
        for ms_name, ms_params in ms.iteritems():
            if not pcmk.crm_opt_exists(ms_name):
                cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Orders: %s' % (orders), level=DEBUG)
        for ord_name, ord_params in orders.iteritems():
            if not pcmk.crm_opt_exists(ord_name):
                cmd = 'crm -w -F configure order %s %s' % (ord_name,
                                                           ord_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Colocations: %s' % colocations, level=DEBUG)
        for col_name, col_params in colocations.iteritems():
            if not pcmk.crm_opt_exists(col_name):
                cmd = 'crm -w -F configure colocation %s %s' % (col_name,
                                                                col_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Clones: %s' % clones, level=DEBUG)
        for cln_name, cln_params in clones.iteritems():
            if not pcmk.crm_opt_exists(cln_name):
                cmd = 'crm -w -F configure clone %s %s' % (cln_name,
                                                           cln_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        log('Configuring Locations: %s' % locations, level=DEBUG)
        for loc_name, loc_params in locations.iteritems():
            if not pcmk.crm_opt_exists(loc_name):
                cmd = 'crm -w -F configure location %s %s' % (loc_name,
                                                              loc_params)
                pcmk.commit(cmd)
                log('%s' % cmd, level=DEBUG)

        for res_name, res_type in resources.iteritems():
            if len(init_services) != 0 and res_name in init_services:
                # Checks that the resources are running and started.
                # Ensure that clones are excluded as the resource is
                # not directly controllable (dealt with below)
                # Ensure that groups are cleaned up as a whole rather
                # than as individual resources.
                if (res_name not in clones.values()
                        and res_name not in groups.values()
                        and not pcmk.crm_res_running(res_name)):
                    # Just in case, cleanup the resources to ensure they get
                    # started in case they failed for some unrelated reason.
                    cmd = 'crm resource cleanup %s' % res_name
                    pcmk.commit(cmd)

        for cl_name in clones:
            # Always cleanup clones
            cmd = 'crm resource cleanup %s' % cl_name
            pcmk.commit(cmd)

        for grp_name in groups:
            # Always cleanup groups
            cmd = 'crm resource cleanup %s' % grp_name
            pcmk.commit(cmd)

    for rel_id in relation_ids('ha'):
        relation_set(relation_id=rel_id, clustered="yes")