예제 #1
0
파일: quorum.py 프로젝트: idevat/pcs
def quorum_unblock_cmd(argv):
    if len(argv) > 0:
        usage.quorum(["unblock"])
        sys.exit(1)

    if utils.is_rhel6():
        utils.err("operation is not supported on CMAN clusters")

    output, retval = utils.run(
        ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]
    )
    if retval != 0:
        utils.err("unable to check quorum status")
    if output.split("=")[-1].strip() != "1":
        utils.err("cluster is not waiting for nodes to establish quorum")

    unjoined_nodes = (
        set(utils.getNodesFromCorosyncConf())
        -
        set(utils.getCorosyncActiveNodes())
    )
    if not unjoined_nodes:
        utils.err("no unjoined nodes found")
    if "--force" not in utils.pcs_options:
        answer = utils.get_terminal_input(
            (
                "WARNING: If node(s) {nodes} are not powered off or they do"
                + " have access to shared resources, data corruption and/or"
                + " cluster failure may occur. Are you sure you want to"
                + " continue? [y/N] "
            ).format(nodes=", ".join(unjoined_nodes))
        )
        if answer.lower() not in ["y", "yes"]:
            print("Canceled")
            return
    for node in unjoined_nodes:
        stonith.stonith_confirm([node], skip_question=True)

    output, retval = utils.run(
        ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]
    )
    if retval != 0:
        utils.err("unable to cancel waiting for nodes")
    print("Quorum unblocked")

    startup_fencing = utils.get_set_properties().get("startup-fencing", "")
    utils.set_cib_property(
        "startup-fencing",
        "false" if startup_fencing.lower() != "false" else "true"
    )
    utils.set_cib_property("startup-fencing", startup_fencing)
    print("Waiting for nodes canceled")
예제 #2
0
def quorum_unblock_cmd(argv):
    if len(argv) > 0:
        usage.quorum(["unblock"])
        sys.exit(1)

    if utils.is_rhel6():
        utils.err("operation is not supported on CMAN clusters")

    output, retval = utils.run(
        ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]
    )
    if retval != 0:
        utils.err("unable to check quorum status")
    if output.split("=")[-1].strip() != "1":
        utils.err("cluster is not waiting for nodes to establish quorum")

    unjoined_nodes = (
        set(utils.getNodesFromCorosyncConf())
        -
        set(utils.getCorosyncActiveNodes())
    )
    if not unjoined_nodes:
        utils.err("no unjoined nodes found")
    if "--force" not in utils.pcs_options:
        answer = utils.get_terminal_input(
            (
                "WARNING: If node(s) {nodes} are not powered off or they do"
                + " have access to shared resources, data corruption and/or"
                + " cluster failure may occur. Are you sure you want to"
                + " continue? [y/N] "
            ).format(nodes=", ".join(unjoined_nodes))
        )
        if answer.lower() not in ["y", "yes"]:
            print("Canceled")
            return
    for node in unjoined_nodes:
        stonith.stonith_confirm([node], skip_question=True)

    output, retval = utils.run(
        ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]
    )
    if retval != 0:
        utils.err("unable to cancel waiting for nodes")
    print("Quorum unblocked")

    startup_fencing = utils.get_set_properties().get("startup-fencing", "")
    utils.set_cib_property(
        "startup-fencing",
        "false" if startup_fencing.lower() != "false" else "true"
    )
    utils.set_cib_property("startup-fencing", startup_fencing)
    print("Waiting for nodes canceled")
예제 #3
0
def cluster_pcsd_status(argv, dont_exit=False):
    bad_nodes = False
    if len(argv) == 0:
        nodes = utils.getNodesFromCorosyncConf()
        if len(nodes) == 0:
            if utils.is_rhel6():
                utils.err("no nodes found in cluster.conf")
            else:
                utils.err("no nodes found in corosync.conf")
        bad_nodes = check_nodes(nodes, "  ")
    else:
        bad_nodes = check_nodes(argv, "  ")
    if bad_nodes and not dont_exit:
        sys.exit(2)
예제 #4
0
파일: status.py 프로젝트: HideoYamauchi/pcs
def cluster_pcsd_status(argv, dont_exit=False):
    bad_nodes = False
    if len(argv) == 0:
        nodes = utils.getNodesFromCorosyncConf()
        if len(nodes) == 0:
            if utils.is_rhel6():
                utils.err("no nodes found in cluster.conf")
            else:
                utils.err("no nodes found in corosync.conf")
        bad_nodes = check_nodes(nodes, "  ")
    else:
        bad_nodes = check_nodes(argv, "  ")
    if bad_nodes and not dont_exit:
        sys.exit(2)
예제 #5
0
파일: stonith.py 프로젝트: dchirikov/pcs
def stonith_level_verify():
    dom = utils.get_cib_dom()
    corosync_nodes = []
    if utils.hasCorosyncConf():
        corosync_nodes = utils.getNodesFromCorosyncConf()
    pacemaker_nodes = utils.getNodesFromPacemaker()

    fls = dom.getElementsByTagName("fencing-level")
    for fl in fls:
        node = fl.getAttribute("target")
        devices = fl.getAttribute("devices")
        for dev in devices.split(","):
            if not utils.is_stonith_resource(dev):
                utils.err("%s is not a stonith id" % dev)
        if node not in corosync_nodes and node not in pacemaker_nodes:
            utils.err("%s is not currently a node" % node)
예제 #6
0
def stonith_level_verify():
    dom = utils.get_cib_dom()
    corosync_nodes = []
    if utils.hasCorosyncConf():
        corosync_nodes = utils.getNodesFromCorosyncConf()
    pacemaker_nodes = utils.getNodesFromPacemaker()

    fls = dom.getElementsByTagName("fencing-level")
    for fl in fls:
        node = fl.getAttribute("target")
        devices = fl.getAttribute("devices")
        for dev in devices.split(","):
            if not utils.is_stonith_resource(dev):
                utils.err("%s is not a stonith id" % dev)
        if node not in corosync_nodes and node not in pacemaker_nodes:
            utils.err("%s is not currently a node" % node)
예제 #7
0
def stonith_level_add(level, node, devices):
    dom = utils.get_cib_dom()

    if not re.search(r'^\d+$', level) or re.search(r'^0+$', level):
        utils.err("invalid level '{0}', use a positive integer".format(level))
    level = level.lstrip('0')
    if "--force" not in utils.pcs_options:
        for dev in devices.split(","):
            if not utils.is_stonith_resource(dev):
                utils.err("%s is not a stonith id (use --force to override)" %
                          dev)
        corosync_nodes = []
        if utils.hasCorosyncConf():
            corosync_nodes = utils.getNodesFromCorosyncConf()
        pacemaker_nodes = utils.getNodesFromPacemaker()
        if node not in corosync_nodes and node not in pacemaker_nodes:
            utils.err("%s is not currently a node (use --force to override)" %
                      node)

    ft = dom.getElementsByTagName("fencing-topology")
    if len(ft) == 0:
        conf = dom.getElementsByTagName("configuration")[0]
        ft = dom.createElement("fencing-topology")
        conf.appendChild(ft)
    else:
        ft = ft[0]

    fls = ft.getElementsByTagName("fencing-level")
    for fl in fls:
        if fl.getAttribute("target") == node and fl.getAttribute(
                "index") == level and fl.getAttribute("devices") == devices:
            utils.err(
                "unable to add fencing level, fencing level for node: %s, at level: %s, with device: %s already exists"
                % (node, level, devices))

    new_fl = dom.createElement("fencing-level")
    ft.appendChild(new_fl)
    new_fl.setAttribute("target", node)
    new_fl.setAttribute("index", level)
    new_fl.setAttribute("devices", devices)
    new_fl.setAttribute("id",
                        utils.find_unique_id(dom, "fl-" + node + "-" + level))

    utils.replace_cib_configuration(dom)
예제 #8
0
def pcsd_sync_certs(argv, exit_after_error=True, async_restart=False):
    error = False
    nodes_sync = argv if argv else utils.getNodesFromCorosyncConf()
    nodes_restart = []

    print("Synchronizing pcsd certificates on nodes {0}...".format(
        ", ".join(nodes_sync)))
    pcsd_data = {
        "nodes": nodes_sync,
    }
    output, retval = utils.run_pcsdcli("send_local_certs", pcsd_data)
    if retval == 0 and output["status"] == "ok" and output["data"]:
        try:
            sync_result = output["data"]
            if sync_result["node_status"]:
                for node, status in sync_result["node_status"].items():
                    print("{0}: {1}".format(node, status["text"]))
                    if status["status"] == "ok":
                        nodes_restart.append(node)
                    else:
                        error = True
            if sync_result["status"] != "ok":
                error = True
                utils.err(sync_result["text"], False)
            if error and not nodes_restart:
                if exit_after_error:
                    sys.exit(1)
                else:
                    return
        except (KeyError, AttributeError):
            utils.err("Unable to communicate with pcsd", exit_after_error)
            return
    else:
        utils.err("Unable to sync pcsd certificates", exit_after_error)
        return

    print(
        "Restarting pcsd on the nodes in order to reload the certificates...")
    pcsd_restart_nodes(nodes_restart,
                       exit_after_error,
                       async_restart=async_restart)
예제 #9
0
파일: pcsd.py 프로젝트: HideoYamauchi/pcs
def pcsd_sync_certs(argv, exit_after_error=True):
    error = False
    nodes_sync = argv if argv else utils.getNodesFromCorosyncConf()
    nodes_restart = []

    print("Synchronizing pcsd certificates on nodes {0}...".format(
        ", ".join(nodes_sync)
    ))
    pcsd_data = {
        "nodes": nodes_sync,
    }
    output, retval = utils.run_pcsdcli("send_local_certs", pcsd_data)
    if retval == 0 and output["status"] == "ok" and output["data"]:
        try:
            sync_result = output["data"]
            if sync_result["node_status"]:
                for node, status in sync_result["node_status"].items():
                    print("{0}: {1}".format(node, status["text"]))
                    if status["status"] == "ok":
                        nodes_restart.append(node)
                    else:
                        error = True
            if sync_result["status"] != "ok":
                error = True
                utils.err(sync_result["text"], False)
            if error and not nodes_restart:
                if exit_after_error:
                    sys.exit(1)
                else:
                    return
        except (KeyError, AttributeError):
            utils.err("Unable to communicate with pcsd", exit_after_error)
            return
    else:
        utils.err("Unable to sync pcsd certificates", exit_after_error)
        return

    print("Restarting pcsd on the nodes in order to reload the certificates...")
    pcsd_restart_nodes(nodes_restart, exit_after_error)
예제 #10
0
파일: stonith.py 프로젝트: dchirikov/pcs
def stonith_level_add(level, node, devices):
    dom = utils.get_cib_dom()

    if not re.search(r'^\d+$', level) or re.search(r'^0+$', level):
        utils.err("invalid level '{0}', use a positive integer".format(level))
    level = level.lstrip('0')
    if "--force" not in utils.pcs_options:
        for dev in devices.split(","):
            if not utils.is_stonith_resource(dev):
                utils.err("%s is not a stonith id (use --force to override)" % dev)
        corosync_nodes = []
        if utils.hasCorosyncConf():
            corosync_nodes = utils.getNodesFromCorosyncConf()
        pacemaker_nodes = utils.getNodesFromPacemaker()
        if node not in corosync_nodes and node not in pacemaker_nodes:
            utils.err("%s is not currently a node (use --force to override)" % node)

    ft = dom.getElementsByTagName("fencing-topology")
    if len(ft) == 0:
        conf = dom.getElementsByTagName("configuration")[0]
        ft = dom.createElement("fencing-topology")
        conf.appendChild(ft)
    else:
        ft = ft[0]

    fls = ft.getElementsByTagName("fencing-level")
    for fl in fls:
        if fl.getAttribute("target") == node and fl.getAttribute("index") == level and fl.getAttribute("devices") == devices:
            utils.err("unable to add fencing level, fencing level for node: %s, at level: %s, with device: %s already exists" % (node,level,devices))

    new_fl = dom.createElement("fencing-level")
    ft.appendChild(new_fl)
    new_fl.setAttribute("target", node)
    new_fl.setAttribute("index", level)
    new_fl.setAttribute("devices", devices)
    new_fl.setAttribute("id", utils.find_unique_id(dom, "fl-" + node +"-" + level))

    utils.replace_cib_configuration(dom)
예제 #11
0
파일: config.py 프로젝트: dchirikov/pcs
def config_restore_remote(infile_name, infile_obj):
    extracted = {
        "version.txt": "",
        "corosync.conf": "",
        "cluster.conf": "",
    }
    try:
        tarball = tarfile.open(infile_name, "r|*", infile_obj)
        while True:
            # next(tarball) does not work in python2.6
            tar_member_info = tarball.next()
            if tar_member_info is None:
                break
            if tar_member_info.name in extracted:
                tar_member = tarball.extractfile(tar_member_info)
                extracted[tar_member_info.name] = tar_member.read()
                tar_member.close()
        tarball.close()
    except (tarfile.TarError, EnvironmentError) as e:
        utils.err("unable to read the tarball: %s" % e)

    config_backup_check_version(extracted["version.txt"])

    node_list = utils.getNodesFromCorosyncConf(
        extracted["cluster.conf" if utils.is_rhel6() else "corosync.conf"].decode("utf-8")
    )
    if not node_list:
        utils.err("no nodes found in the tarball")

    err_msgs = []
    for node in node_list:
        try:
            retval, output = utils.checkStatus(node)
            if retval != 0:
                err_msgs.append(output)
                continue
            status = json.loads(output)
            if (
                status["corosync"]
                or
                status["pacemaker"]
                or
                status["cman"]
                or
                # not supported by older pcsd, do not fail if not present
                status.get("pacemaker_remote", False)
            ):
                err_msgs.append(
                    "Cluster is currently running on node %s. You need to stop "
                        "the cluster in order to restore the configuration."
                    % node
                )
                continue
        except (ValueError, NameError, LookupError):
            err_msgs.append("unable to determine status of the node %s" % node)
    if err_msgs:
        for msg in err_msgs:
            utils.err(msg, False)
        sys.exit(1)

    # Temporarily disable config files syncing thread in pcsd so it will not
    # rewrite restored files. 10 minutes should be enough time to restore.
    # If node returns HTTP 404 it does not support config syncing at all.
    for node in node_list:
        retval, output = utils.pauseConfigSyncing(node, 10 * 60)
        if not (retval == 0 or "(HTTP error: 404)" in output):
            utils.err(output)

    if infile_obj:
        infile_obj.seek(0)
        tarball_data = infile_obj.read()
    else:
        with open(infile_name, "rb") as tarball:
            tarball_data = tarball.read()

    error_list = []
    for node in node_list:
        retval, error = utils.restoreConfig(node, tarball_data)
        if retval != 0:
            error_list.append(error)
    if error_list:
        utils.err("unable to restore all nodes\n" + "\n".join(error_list))
예제 #12
0
def nodes_status(argv):
    if len(argv) == 1 and argv[0] == "pacemaker-id":
        for node_id, node_name in utils.getPacemakerNodesID().items():
            print("{0} {1}".format(node_id, node_name))
        return

    if len(argv) == 1 and argv[0] == "corosync-id":
        for node_id, node_name in utils.getCorosyncNodesID().items():
            print("{0} {1}".format(node_id, node_name))
        return

    if len(argv) == 1 and (argv[0] == "config"):
        if utils.hasCorosyncConf():
            corosync_nodes = utils.getNodesFromCorosyncConf()
        else:
            corosync_nodes = []
        try:
            pacemaker_nodes = sorted([
                node.attrs.name for node in ClusterState(
                    utils.getClusterStateXml()).node_section.nodes
                if node.attrs.type != 'remote'
            ])
        except LibraryError as e:
            utils.process_library_reports(e.args)
        print("Corosync Nodes:")
        if corosync_nodes:
            print(" " + " ".join(corosync_nodes))
        print("Pacemaker Nodes:")
        if pacemaker_nodes:
            print(" " + " ".join(pacemaker_nodes))

        return

    if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"):
        all_nodes = utils.getNodesFromCorosyncConf()
        online_nodes = utils.getCorosyncActiveNodes()
        offline_nodes = []
        for node in all_nodes:
            if node not in online_nodes:
                offline_nodes.append(node)

        online_nodes.sort()
        offline_nodes.sort()
        print("Corosync Nodes:")
        print(" ".join([" Online:"] + online_nodes))
        print(" ".join([" Offline:"] + offline_nodes))
        if argv[0] != "both":
            sys.exit(0)

    info_dom = utils.getClusterState()

    nodes = info_dom.getElementsByTagName("nodes")
    if nodes.length == 0:
        utils.err("No nodes section found")

    onlinenodes = []
    offlinenodes = []
    standbynodes = []
    maintenancenodes = []
    remote_onlinenodes = []
    remote_offlinenodes = []
    remote_standbynodes = []
    remote_maintenancenodes = []
    for node in nodes[0].getElementsByTagName("node"):
        node_name = node.getAttribute("name")
        node_remote = node.getAttribute("type") == "remote"
        if node.getAttribute("online") == "true":
            if node.getAttribute("standby") == "true":
                if node_remote:
                    remote_standbynodes.append(node_name)
                else:
                    standbynodes.append(node_name)
            elif node.getAttribute("maintenance") == "true":
                if node_remote:
                    remote_maintenancenodes.append(node_name)
                else:
                    maintenancenodes.append(node_name)
            else:
                if node_remote:
                    remote_onlinenodes.append(node_name)
                else:
                    onlinenodes.append(node_name)
        else:
            if node_remote:
                remote_offlinenodes.append(node_name)
            else:
                offlinenodes.append(node_name)

    print("Pacemaker Nodes:")
    print(" ".join([" Online:"] + onlinenodes))
    print(" ".join([" Standby:"] + standbynodes))
    print(" ".join([" Maintenance:"] + maintenancenodes))
    print(" ".join([" Offline:"] + offlinenodes))

    print("Pacemaker Remote Nodes:")
    print(" ".join([" Online:"] + remote_onlinenodes))
    print(" ".join([" Standby:"] + remote_standbynodes))
    print(" ".join([" Maintenance:"] + remote_maintenancenodes))
    print(" ".join([" Offline:"] + remote_offlinenodes))
예제 #13
0
파일: config.py 프로젝트: rriifftt/pcs
def config_restore_remote(infile_name, infile_obj):
    extracted = {
        "version.txt": "",
        "corosync.conf": "",
        "cluster.conf": "",
    }
    try:
        tarball = tarfile.open(infile_name, "r|*", infile_obj)
        while True:
            # next(tarball) does not work in python2.6
            tar_member_info = tarball.next()
            if tar_member_info is None:
                break
            if tar_member_info.name in extracted:
                tar_member = tarball.extractfile(tar_member_info)
                extracted[tar_member_info.name] = tar_member.read()
                tar_member.close()
        tarball.close()
    except (tarfile.TarError, EnvironmentError) as e:
        utils.err("unable to read the tarball: %s" % e)

    config_backup_check_version(extracted["version.txt"])

    node_list = utils.getNodesFromCorosyncConf(
        extracted["cluster.conf" if utils.is_rhel6() else "corosync.conf"].decode("utf-8")
    )
    if not node_list:
        utils.err("no nodes found in the tarball")

    err_msgs = []
    for node in node_list:
        try:
            retval, output = utils.checkStatus(node)
            if retval != 0:
                err_msgs.append(output)
                continue
            status = json.loads(output)
            if (
                status["corosync"]
                or
                status["pacemaker"]
                or
                status["cman"]
                or
                # not supported by older pcsd, do not fail if not present
                status.get("pacemaker_remote", False)
            ):
                err_msgs.append(
                    "Cluster is currently running on node %s. You need to stop "
                        "the cluster in order to restore the configuration."
                    % node
                )
                continue
        except (ValueError, NameError, LookupError):
            err_msgs.append("unable to determine status of the node %s" % node)
    if err_msgs:
        for msg in err_msgs:
            utils.err(msg, False)
        sys.exit(1)

    # Temporarily disable config files syncing thread in pcsd so it will not
    # rewrite restored files. 10 minutes should be enough time to restore.
    # If node returns HTTP 404 it does not support config syncing at all.
    for node in node_list:
        retval, output = utils.pauseConfigSyncing(node, 10 * 60)
        if not (retval == 0 or "(HTTP error: 404)" in output):
            utils.err(output)

    if infile_obj:
        infile_obj.seek(0)
        tarball_data = infile_obj.read()
    else:
        with open(infile_name, "rb") as tarball:
            tarball_data = tarball.read()

    error_list = []
    for node in node_list:
        retval, error = utils.restoreConfig(node, tarball_data)
        if retval != 0:
            error_list.append(error)
    if error_list:
        utils.err("unable to restore all nodes\n" + "\n".join(error_list))
예제 #14
0
파일: status.py 프로젝트: HideoYamauchi/pcs
def nodes_status(argv):
    if len(argv) == 1 and argv[0] == "pacemaker-id":
        for node_id, node_name in utils.getPacemakerNodesID().items():
            print("{0} {1}".format(node_id, node_name))
        return

    if len(argv) == 1 and argv[0] == "corosync-id":
        for node_id, node_name in utils.getCorosyncNodesID().items():
            print("{0} {1}".format(node_id, node_name))
        return

    if len(argv) == 1 and (argv[0] == "config"):
        if utils.hasCorosyncConf():
            corosync_nodes = utils.getNodesFromCorosyncConf()
        else:
            corosync_nodes = []
        try:
            pacemaker_nodes = sorted([
                node.attrs.name for node
                in ClusterState(utils.getClusterStateXml()).node_section.nodes
                if node.attrs.type != 'remote'
            ])
        except LibraryError as e:
            utils.process_library_reports(e.args)
        print("Corosync Nodes:")
        if corosync_nodes:
            print(" " + " ".join(corosync_nodes))
        print("Pacemaker Nodes:")
        if pacemaker_nodes:
            print(" " + " ".join(pacemaker_nodes))

        return

    if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"):
        all_nodes = utils.getNodesFromCorosyncConf()
        online_nodes = utils.getCorosyncActiveNodes()
        offline_nodes = []
        for node in all_nodes:
            if node not in online_nodes:
                offline_nodes.append(node)

        online_nodes.sort()
        offline_nodes.sort()
        print("Corosync Nodes:")
        print(" ".join([" Online:"] + online_nodes))
        print(" ".join([" Offline:"] + offline_nodes))
        if argv[0] != "both":
            sys.exit(0)

    info_dom = utils.getClusterState()

    nodes = info_dom.getElementsByTagName("nodes")
    if nodes.length == 0:
        utils.err("No nodes section found")

    onlinenodes = []
    offlinenodes = []
    standbynodes = []
    maintenancenodes = []
    remote_onlinenodes = []
    remote_offlinenodes = []
    remote_standbynodes = []
    remote_maintenancenodes = []
    for node in nodes[0].getElementsByTagName("node"):
        node_name = node.getAttribute("name")
        node_remote = node.getAttribute("type") == "remote"
        if node.getAttribute("online") == "true":
            if node.getAttribute("standby") == "true":
                if node_remote:
                    remote_standbynodes.append(node_name)
                else:
                    standbynodes.append(node_name)
            elif node.getAttribute("maintenance") == "true":
                if node_remote:
                    remote_maintenancenodes.append(node_name)
                else:
                    maintenancenodes.append(node_name)
            else:
                if node_remote:
                    remote_onlinenodes.append(node_name)
                else:
                    onlinenodes.append(node_name)
        else:
            if node_remote:
                remote_offlinenodes.append(node_name)
            else:
                offlinenodes.append(node_name)

    print("Pacemaker Nodes:")
    print(" ".join([" Online:"] + onlinenodes))
    print(" ".join([" Standby:"] + standbynodes))
    print(" ".join([" Maintenance:"] + maintenancenodes))
    print(" ".join([" Offline:"] + offlinenodes))

    print("Pacemaker Remote Nodes:")
    print(" ".join([" Online:"] + remote_onlinenodes))
    print(" ".join([" Standby:"] + remote_standbynodes))
    print(" ".join([" Maintenance:"] + remote_maintenancenodes))
    print(" ".join([" Offline:"] + remote_offlinenodes))