Ejemplo n.º 1
0
def import_target(device_type,
                  path,
                  pacemaker_ha_operation,
                  validate_importable=False):
    """
    Passed a device type and a path import the device if such an operation make sense. For example a jbod scsi
    disk does not have the concept of import whilst zfs does.
    :param device_type: the type of device to import
    :param path: path of device to import
    :param pacemaker_ha_operation: This import is at the request of pacemaker. In HA operations the device may
               often have not have been cleanly exported because the previous mounted node failed in operation.
    :param validate_importable: The intention is to make sure the device can be imported but not actually import it.
               in this in incarnation the device is import and the exported checking for errors.
    :return: None or an Error message
    """
    blockdevice = BlockDevice(device_type, path)

    error = blockdevice.import_(False)
    if error:
        if '-f' in error and pacemaker_ha_operation:
            error = blockdevice.import_(True)

    if error:
        console_log.error("Error importing pool: '%s'" % error)

    if (error is None) and (validate_importable is True):
        error = blockdevice.export()

        if error:
            console_log.error("Error exporting pool: '%s'" % error)

    return agent_ok_or_error(error)
Ejemplo n.º 2
0
def _configure_rsyslog(destination):
    from tempfile import mkstemp
    tmp_f, tmp_name = mkstemp(dir='/etc')
    f = open('/etc/rsyslog.conf', 'r')
    skip = False
    for line in f.readlines():
        if skip:
            if line == "# added by chroma-agent\n":
                skip = False
                continue
        if line == "# added by chroma-agent\n":
            skip = True
            continue
        if not skip:
            os.write(tmp_f, line)
    f.close()
    if destination != "":
        os.write(tmp_f, "# added by chroma-agent\n" \
                        "$PreserveFQDN on\n" \
                        "*.* @@%s:%s;RSYSLOG_ForwardFormat\n" \
                        "# added by chroma-agent\n" % (destination, SYSLOG_PORT))
    os.close(tmp_f)
    os.chmod(tmp_name, 0644)
    os.rename(tmp_name, "/etc/rsyslog.conf")

    error = None

    # signal the process and restart if the signal fails.
    error = rsyslog_service.reload() and rsyslog_service.restart()

    return agent_ok_or_error(error)
Ejemplo n.º 3
0
def unconfigure_corosync2(host_fqdn, mcast_port):
    """
    Unconfigure the corosync application.

    For corosync2 don't disable pcsd, just remove host node from cluster and disable corosync from
    auto starting (service should already be stopped in state transition)

    Note that pcs cluster commands handle editing and removal of the corosync.conf file

    Return: Value using simple return protocol
    """
    error = corosync_service.disable()
    if error:
        return agent_error(error)

    # Detect if we are the only node in the cluster, we want to do this before next command removes conf file
    cluster_nodes = _nodes_in_cluster()

    result = AgentShell.run(["pcs", "--force", "cluster", "node", "remove", host_fqdn])

    if result.rc != 0:
        if "No such file or directory" in result.stderr:
            # we want to return successful if the configuration file does not exist
            console_log.warning(result.stderr)
        elif "Error: Unable to update any nodes" in result.stderr:
            # this error is expected when this is the last node in the cluster
            if len(cluster_nodes) != 1:
                return agent_error(result.stderr)
        else:
            return agent_error(result.stderr)

    return agent_ok_or_error(
        firewall_control.remove_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True)
        or firewall_control.remove_rule(mcast_port, "udp", "corosync", persist=True)
    )
Ejemplo n.º 4
0
def _configure_pacemaker():
    '''
    Configure pacemaker if this node is the dc.

    :return: agent_ok if no error else returns an agent_error
    '''
    pc = PacemakerConfig()

    timeout_time = time.time() + PACEMAKER_CONFIGURE_TIMEOUT
    error = None

    while (pc.configured is False) and (time.time() < timeout_time):
        if pc.is_dc:
            daemon_log.info(
                'Configuring (global) pacemaker configuration because I am the DC'
            )

            error = _do_configure_pacemaker(pc)

            if error:
                return agent_error(error)
        else:
            daemon_log.info(
                'Not configuring (global) pacemaker configuration because I am not the DC'
            )

        time.sleep(10)

    if pc.configured is False:
        error = 'Failed to configure (global) pacemaker configuration dc=%s' % pc.dc

    return agent_ok_or_error(error)
Ejemplo n.º 5
0
def unload_lnet():
    '''
    Unload the lnet modules from memory including an modules that are dependent on the lnet
    module.

    Lnet must be stopped before unload_lnet is called.
    '''
    return agent_ok_or_error(_rmmod('lnet'))
Ejemplo n.º 6
0
def check_block_device(path, device_type):
    """
    Precursor to formatting a device: check if there is already a filesystem on it.

    :param path: Path to a block device
    :param device_type: The type of device the path references
    :return The filesystem type of the filesystem on the device, or None if unoccupied.
    """
    return agent_ok_or_error(BlockDevice(device_type, path).filesystem_info)
Ejemplo n.º 7
0
def unload_lnet():
    """
    Unload the lnet modules from memory including an modules that are dependent on the lnet
    module.

    Lnet must be stopped before unload_lnet is called.
    """
    return agent_ok_or_error(
        AgentShell.run_canned_error_message(["lustre_rmmod"]))
def stop_lnet():
    '''
    Place lnet into the 'down' state, any modules that are dependent on lnet being in the 'up' state
    will be unloaded before lnet is stopped.
    '''

    console_log.info("Stopping LNet")
    return agent_ok_or_error(_rmmod_deps("lnet", excpt=["ksocklnd", "ko2iblnd"]) or
                             AgentShell.run_canned_error_message(["lctl", "net", "down"]))
def start_lnet():
    '''
    Place lnet into the 'up' state.
    '''
    console_log.info("Starting LNet")

    # modprobe lust is a hack for HYD-1263 - Fix or work around LU-1279 - failure trying to mount
    # should be removed when LU-1279 is fixed
    return agent_ok_or_error(AgentShell.run_canned_error_message(["lctl", "net", "up"]) or
                             AgentShell.run_canned_error_message(["modprobe", "lustre"]))
Ejemplo n.º 10
0
def configure_ntp(ntp_server):
    """
    Change the ntp configuration file to use the server passed

    :return: Value using simple return protocol
    """
    error = NTPConfig().add(ntp_server)
    if error:
        return error
    else:
        return agent_ok_or_error(ntp_service.restart())
Ejemplo n.º 11
0
def unconfigure_fencing():
    # only unconfigure if we are the only node in the cluster
    # but first, see if pacemaker is up to answer this
    if not _pacemaker_running():
        # and just skip doing this if it's not
        return 0

    if _get_cluster_size() > 1:
        return 0

    return agent_ok_or_error(_unconfigure_fencing())
Ejemplo n.º 12
0
def change_mcast_port(old_mcast_port, new_mcast_port):
    """
    Update corosync configuration with a new mcast_port on this managed server (not all the nodes in the cluster)
    Corosync will read the updated value in the configuration file, which it is polling for updates.

    Return: Value using simple return protocol
    """
    file_edit_args = ['sed', '-i.bak', 's/mcastport:.*/mcastport: %s/g' % new_mcast_port, COROSYNC_CONF_PATH]

    return agent_ok_or_error(firewall_control.remove_rule(old_mcast_port, "udp", "corosync", persist=True) or
                             firewall_control.add_rule(new_mcast_port, "udp", "corosync", persist=True) or
                             AgentShell.run_canned_error_message(file_edit_args))
Ejemplo n.º 13
0
def stop_lnet():
    """
    Place lnet into the 'down' state, any modules that are dependent on lnet being in the 'up' state
    will be unloaded before lnet is stopped.
    """

    console_log.info("Stopping LNet")

    return agent_ok_or_error(
        AgentShell.run_canned_error_message(["lustre_rmmod", "ptlrpc"])
        or AgentShell.run_canned_error_message(
            ["lnetctl", "lnet", "unconfigure"]))
Ejemplo n.º 14
0
def configure_corosync2_stage_2(ring0_name, ring1_name, new_node_fqdn, mcast_port, pcs_password, create_cluster):
    """Process configuration including peers and negotiated multicast port, no IP address
    information required

    Note: "The pcs cluster setup command will automatically configure two_node: 1 in
    corosync.conf, so a two-node cluster will "just work". If you are using a different cluster
    shell, you will have to configure corosync.conf appropriately yourself." Therefore
    no-quorum-policy does not have to be set when setting up cluster with pcs.

    :param ring0_name:
    :param ring1_name:
    :param peer_fqdns:
    :param mcast_port:
    :return:
    """

    interfaces = [InterfaceInfo(CorosyncRingInterface(name=ring0_name, ringnumber=0,
                                                      mcastport=mcast_port), None, None),
                  InterfaceInfo(CorosyncRingInterface(name=ring1_name, ringnumber=1,
                                                      mcastport=mcast_port), None, None)]

    config_params = {
        'token': '17000',
        'fail_recv_const': '10',
        'transport': 'udp',
        'rrpmode': 'passive',
        'addr0': interfaces[0].corosync_iface.bindnetaddr,
        'addr1': interfaces[1].corosync_iface.bindnetaddr,
        'mcast0': interfaces[0].corosync_iface.mcastaddr,
        'mcast1': interfaces[1].corosync_iface.mcastaddr,
        'mcastport0': interfaces[0].corosync_iface.mcastport,
        'mcastport1': interfaces[1].corosync_iface.mcastport
    }

    # authenticate nodes in cluster
    authenticate_nodes_in_cluster_command = ['pcs', 'cluster', 'auth', new_node_fqdn,
                                             '-u', PCS_USER, '-p', pcs_password]

    # build command string for setup of cluster which will result in corosync.conf rather than
    # writing from template, note we don't start the cluster here as services are managed
    # independently
    if create_cluster:
        cluster_setup_command = ['pcs', 'cluster', 'setup', '--name', PCS_CLUSTER_NAME, '--force'] + [new_node_fqdn]
        for param in ['transport', 'rrpmode', 'addr0', 'mcast0', 'mcastport0', 'addr1', 'mcast1',
                      'mcastport1', 'token', 'fail_recv_const']:
            # pull this value from the dictionary using parameter keyword
            cluster_setup_command.extend(["--" + param, str(config_params[param])])
    else:
        cluster_setup_command = ['pcs', 'cluster', 'node', 'add', new_node_fqdn]

    return agent_ok_or_error(AgentShell.run_canned_error_message(authenticate_nodes_in_cluster_command) or
                             AgentShell.run_canned_error_message(cluster_setup_command))
Ejemplo n.º 15
0
def unconfigure_pacemaker():
    # only unconfigure if we are the only node in the cluster
    # but first, see if pacemaker is up to answer this
    if not _pacemaker_running():
        # and just skip doing this if it's not
        return agent_result_ok

    if _get_cluster_size() < 2:
        # last node, nuke the CIB
        cibadmin(["-f", "-E"])

    return agent_ok_or_error(pacemaker_service.stop()
                             or pacemaker_service.disable())
Ejemplo n.º 16
0
def configure_corosync2_stage_1(mcast_port, pcs_password):
    # need to use user "hacluster" which is created on install of "pcs" package,
    # WARNING: clear text password
    set_password_command = ['bash', '-c', 'echo %s | passwd --stdin %s' %
                                          (pcs_password,
                                           PCS_USER)]

    return agent_ok_or_error(AgentShell.run_canned_error_message(set_password_command) or
                             firewall_control.add_rule(mcast_port, "udp", "corosync", persist=True) or
                             firewall_control.add_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True) or
                             pcsd_service.start() or
                             corosync_service.enable() or
                             pcsd_service.enable())
Ejemplo n.º 17
0
def configure_ntp(ntp_server):
    """
    Change the ntp configuration file to use the server passed

    :return: Value using simple return protocol
    """
    error = NTPConfig().add(ntp_server)
    if error:
        return error
    else:
        chrony_service.stop(validate_time=0.5)
        chrony_service.disable()
        ntp_service.enable()
        return agent_ok_or_error(ntp_service.restart())
Ejemplo n.º 18
0
def configure_corosync(ring0_name, ring1_name, old_mcast_port, new_mcast_port):
    """
    Process configuration including negotiated multicast port, no IP address information required

    :param ring0_name:
    :param ring1_name:
    :param old_mcast_port: None if we are configuring corosync for the first-time, present if changing mcast port
    :param new_mcast_port: desired corosync multicast port as configured by user
    :return: Value using simple return protocol
    """

    interfaces = [
        InterfaceInfo(
            CorosyncRingInterface(name=ring0_name,
                                  ringnumber=0,
                                  mcastport=new_mcast_port),
            None,
            None,
        ),
        InterfaceInfo(
            CorosyncRingInterface(name=ring1_name,
                                  ringnumber=1,
                                  mcastport=new_mcast_port),
            None,
            None,
        ),
    ]

    config = render_config(
        [interface.corosync_iface for interface in interfaces])

    write_config_to_file("/etc/corosync/corosync.conf", config)

    if old_mcast_port is not None:
        error = firewall_control.remove_rule(old_mcast_port,
                                             "udp",
                                             "corosync",
                                             persist=True)

        if error:
            return agent_error(error)

    return agent_ok_or_error(
        firewall_control.add_rule(
            new_mcast_port, "udp", "corosync", persist=True)
        or corosync_service.enable())
Ejemplo n.º 19
0
def export_target(device_type, path):
    """
    Passed a device type and a path export the device if such an operation make sense. For example a jbod scsi
    disk does not have the concept of export whilst zfs does.
    :param path: path of device to export
    :param device_type: the type of device to export
    :return: None or an Error message
    """

    blockdevice = BlockDevice(device_type, path)

    error = blockdevice.export()

    if error:
        console_log.error("Error exporting pool: '%s'" % error)

    return agent_ok_or_error(error)
Ejemplo n.º 20
0
def configure_corosync2_stage_1(mcast_port, pcs_password, fqdn=None):
    # need to use user "hacluster" which is created on install of "pcs" package,
    # WARNING: clear text password
    set_password_command = [
        "bash",
        "-c",
        "echo %s | passwd --stdin %s" % (pcs_password, PCS_USER),
    ]
    if fqdn is not None:
        error = AgentShell.run_canned_error_message(
            ["hostnamectl", "set-hostname", fqdn])
        if error:
            return agent_error(error)

    return agent_ok_or_error(
        AgentShell.run_canned_error_message(set_password_command) or
        firewall_control.add_rule(mcast_port, "udp", "corosync", persist=True)
        or firewall_control.add_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True)
        or pcsd_service.start() or corosync_service.enable()
        or pcsd_service.enable())
Ejemplo n.º 21
0
def import_target(device_type, path, pacemaker_ha_operation):
    """
    Passed a device type and a path import the device if such an operation make sense. For example a jbod scsi
    disk does not have the concept of import whilst zfs does.
    :param device_type: the type of device to import
    :param path: path of device to import
    :param pacemaker_ha_operation: This import is at the request of pacemaker. In HA operations the device may
               often have not have been cleanly exported because the previous mounted node failed in operation.
    :return: None or an Error message
    """
    blockdevice = BlockDevice(device_type, path)

    error = blockdevice.import_(False)
    if error:
        if '-f' in error and pacemaker_ha_operation:
            error = blockdevice.import_(True)

    if error:
        console_log.error("Error importing pool: '%s'" % error)

    return agent_ok_or_error(error)
Ejemplo n.º 22
0
def enable_pacemaker():
    return agent_ok_or_error(pacemaker_service.enable())
Ejemplo n.º 23
0
def stop_pacemaker():
    return agent_ok_or_error(pacemaker_service.stop())
Ejemplo n.º 24
0
def stop_corosync2():
    return agent_ok_or_error(corosync_service.stop())
Ejemplo n.º 25
0
def open_firewall(port, address, proto, description, persist):
    firewall_control = FirewallControl.create()

    return agent_ok_or_error(
        firewall_control.add_rule(port, proto, description, persist, address))
Ejemplo n.º 26
0
def start_corosync2():
    return agent_ok_or_error(corosync_service.enable() or corosync_service.start())
Ejemplo n.º 27
0
def _start_service():
    return agent_ok_or_error(agent_service.start())
Ejemplo n.º 28
0
def _disable_service():
    return agent_ok_or_error(agent_service.disable())
Ejemplo n.º 29
0
def _enable_service():
    return agent_ok_or_error(agent_service.enable())
Ejemplo n.º 30
0
def _stop_service():
    return agent_ok_or_error(agent_service.stop())