Ejemplo n.º 1
0
def unconfigure_target_ha(primary, ha_label, uuid):
    '''
    Unconfigure the target high availability

    Return: Value using simple return protocol
    '''

    with PreservePacemakerCorosyncState():
        if get_resource_location(ha_label):
            return agent_error("cannot unconfigure-ha: %s is still running " %
                               ha_label)

        if primary:
            result = cibadmin(
                ["-D", "-X",
                 "<rsc_location id=\"%s-primary\">" % ha_label])
            result = cibadmin(["-D", "-X", "<primitive id=\"%s\">" % ha_label])

            if result.rc != 0 and result.rc != 234:
                return agent_error("Error %s trying to cleanup resource %s" %
                                   (result.rc, ha_label))

        else:
            result = cibadmin(
                ["-D", "-X",
                 "<rsc_location id=\"%s-secondary\">" % ha_label])

        return agent_result_ok
Ejemplo n.º 2
0
def _do_configure_pacemaker(pc):
    # ignoring quorum should only be done on clusters of 2
    if len(pc.nodes) > 2:
        no_quorum_policy = "stop"
    else:
        no_quorum_policy = "ignore"

    error = _unconfigure_fencing()

    if error:
        return error

    # this could race with other cluster members to make sure
    # any errors are only due to it already existing
    try:
        cibadmin([
            "--create",
            "-o",
            "resources",
            "-X",
            '<primitive class="stonith" id="st-fencing" type="fence_chroma"/>',
        ])
    except Exception as e:
        rc, stdout, stderr = AgentShell.run_old(
            ["crm_resource", "--locate", "--resource", "st-fencing"])
        if rc == 0:  # no need to do the rest if another member is already doing it
            return None
        else:
            return e.message

    pc.create_update_properyset(
        "cib-bootstrap-options",
        {
            "no-quorum-policy": no_quorum_policy,
            "symmetric-cluster": "true",
            "cluster-infrastructure": "openais",
            "stonith-enabled": "true",
        },
    )

    def set_rsc_default(name, value):
        """

        :param name: attribute to set
        :param value: value to set
        :return: None if an error else a canned error message
        """
        return AgentShell.run_canned_error_message([
            "crm_attribute",
            "--type",
            "rsc_defaults",
            "--attr-name",
            name,
            "--attr-value",
            value,
        ])

    return (set_rsc_default("resource-stickiness", "1000")
            or set_rsc_default("failure-timeout", RSRC_FAIL_WINDOW) or
            set_rsc_default("migration-threshold", RSRC_FAIL_MIGRATION_COUNT))
Ejemplo n.º 3
0
def _unconfigure_fencing():
    try:
        cibadmin([
            "--delete", "-o", "resources", "-X",
            "<primitive class=\"stonith\" id=\"st-fencing\" type=\"fence_chroma\"/>"
        ])

        return None
    except Exception as e:
        return e.message
Ejemplo n.º 4
0
def unconfigure_pacemaker():
    # only unconfigure if we are the only node in the cluster
    # but first, see if pacemaker is up to answer this
    if not _pacemaker_running():
        # and just skip doing this if it's not
        return agent_result_ok

    if _get_cluster_size() < 2:
        # last node, nuke the CIB
        cibadmin(["-f", "-E"])

    return agent_ok_or_error(pacemaker_service.stop()
                             or pacemaker_service.disable())
Ejemplo n.º 5
0
def _unconfigure_fencing():
    try:
        cibadmin([
            "--delete",
            "-o",
            "resources",
            "-X",
            '<primitive class="stonith" id="st-fencing" type="fence_chroma"/>',
        ])

        return None
    except Exception as e:
        return e.message
Ejemplo n.º 6
0
def delete_node(nodename):
    rc, stdout, stderr = AgentShell.run_old(['crm_node', '-l'])
    node_id = None
    for line in stdout.split('\n'):
        node_id, name, status = line.split(" ")
        if name == nodename:
            break
    AgentShell.try_run(['crm_node', '--force', '-R', node_id])
    cibadmin(
        ["--delete", "-o", "nodes", "-X",
         "<node uname=\"%s\"/>" % nodename])
    cibadmin([
        "--delete", "-o", "nodes", "--crm_xml",
        "<node_state uname=\"%s\"/>" % nodename
    ])
Ejemplo n.º 7
0
def configure_target_ha(primary, device, ha_label, uuid, mount_point):
    '''
    Configure the target high availability

    Return: Value using simple return protocol
    '''

    if primary:
        # If the target already exists with the same params, skip.
        # If it already exists with different params, that is an error
        rc, stdout, stderr = AgentShell.run_old(
            ["crm_resource", "-r", ha_label, "-g", "target"])
        if rc == 0:
            info = _get_target_config(stdout.rstrip("\n"))
            if info['bdev'] == device and info['mntpt'] == mount_point:
                return agent_result_ok
            else:
                return agent_error(
                    "A resource with the name %s already exists" % ha_label)

        tmp_f, tmp_name = tempfile.mkstemp()
        os.write(
            tmp_f,
            "<primitive class=\"ocf\" provider=\"chroma\" type=\"Target\" id=\"%s\">\
  <meta_attributes id=\"%s-meta_attributes\">\
    <nvpair name=\"target-role\" id=\"%s-meta_attributes-target-role\" value=\"Stopped\"/>\
  </meta_attributes>\
  <operations id=\"%s-operations\">\
    <op id=\"%s-monitor-5\" interval=\"5\" name=\"monitor\" timeout=\"60\"/>\
    <op id=\"%s-start-0\" interval=\"0\" name=\"start\" timeout=\"300\"/>\
    <op id=\"%s-stop-0\" interval=\"0\" name=\"stop\" timeout=\"300\"/>\
  </operations>\
  <instance_attributes id=\"%s-instance_attributes\">\
    <nvpair id=\"%s-instance_attributes-target\" name=\"target\" value=\"%s\"/>\
  </instance_attributes>\
</primitive>" % (ha_label, ha_label, ha_label, ha_label, ha_label, ha_label,
                 ha_label, ha_label, ha_label, uuid))
        os.close(tmp_f)

        cibadmin(["-o", "resources", "-C", "-x", "%s" % tmp_name])
        score = 20
        preference = "primary"
    else:
        score = 10
        preference = "secondary"

    # Hostname. This is a shorterm point fix that will allow us to make HP2 release more functional. Between el6 and el7
    # (truthfully we should probably be looking at Pacemaker or Corosync versions) Pacemaker started to use fully qualified
    # domain names rather than just the nodename.  lotus-33vm15.lotus.hpdd.lab.intel.com vs lotus-33vm15. To keep compatiblity
    # easily we have to make the contraints follow the same fqdn vs node.
    if platform_info.distro_version >= 7.0:
        node = socket.getfqdn()
    else:
        node = os.uname()[1]

    result = cibadmin([
        "-o", "constraints", "-C", "-X",
        "<rsc_location id=\"%s-%s\" node=\"%s\" rsc=\"%s\" score=\"%s\"/>" %
        (ha_label, preference, node, ha_label, score)
    ])

    if result.rc == 76:
        return agent_error("A constraint with the name %s-%s already exists" %
                           (ha_label, preference))

    _mkdir_p_concurrent(mount_point)

    return agent_result_ok