Exemple #1
0
def config_show(lib, argv, modifiers):
    """
    Options:
      * -f - CIB file, when getting cluster name on remote node (corosync.conf
        doesn't exist)
      * --corosync_conf - corosync.conf file
    """
    modifiers.ensure_only_supported("-f", "--corosync_conf")
    if argv:
        raise CmdLineInputError()
    print("Cluster Name: %s" % utils.getClusterName())
    status.nodes_status(lib, ["config"], modifiers.get_subset("-f"))
    print()
    print("\n".join(_config_show_cib_lines(lib)))
    if (utils.hasCorosyncConf() and not modifiers.is_specified("-f")
            and not modifiers.is_specified("--corosync_conf")):
        cluster.cluster_uidgid(lib, [],
                               modifiers.get_subset(),
                               silent_list=True)
    if modifiers.is_specified("--corosync_conf") or utils.hasCorosyncConf():
        print()
        print("Quorum:")
        try:
            config = lib_quorum.get_config(utils.get_lib_env())
            print("\n".join(indent(quorum.quorum_config_to_str(config))))
        except LibraryError as e:
            process_library_reports(e.args)
Exemple #2
0
def pcsd_status_cmd(
    lib: Any,
    argv: Sequence[str],
    modifiers: InputModifiers,
    dont_exit: bool = False,
) -> None:
    """
    Options:
      * --request-timeout - HTTP timeout for node authorization check
    """
    # If no arguments get current cluster node status, otherwise get listed
    # nodes status
    del lib
    modifiers.ensure_only_supported("--request-timeout")
    bad_nodes = False
    if not argv:
        nodes, report_list = get_existing_nodes_names(
            utils.get_corosync_conf_facade()
        )
        if not nodes and not dont_exit:
            report_list.append(
                ReportItem.error(
                    reports.messages.CorosyncConfigNoNodesDefined()
                )
            )
        if report_list:
            process_library_reports(report_list)
        bad_nodes = _check_nodes(nodes, "  ")
    else:
        bad_nodes = _check_nodes(argv, "  ")
    if bad_nodes and not dont_exit:
        sys.exit(2)
Exemple #3
0
def get_fence_agent_info(lib, argv, modifiers):
    """
    Options: no options
    """
    del lib
    modifiers.ensure_only_supported()
    if len(argv) != 1:
        utils.err("One parameter expected")

    agent = argv[0]
    if not agent.startswith("stonith:"):
        utils.err("Invalid fence agent name")

    runner = utils.cmd_runner()

    try:
        metadata = lib_ra.StonithAgent(runner, agent[len("stonith:"):])
        info = metadata.get_full_info()
        info["name"] = "stonith:{0}".format(info["name"])
        print(json.dumps(info))
    except lib_ra.ResourceAgentError as e:
        process_library_reports(
            [lib_ra.resource_agent_error_to_report_item(e)])
    except LibraryError as e:
        process_library_reports(e.args)
Exemple #4
0
def quorum_unblock_cmd(lib, argv, modifiers):
    """
    Options:
      * --force - no error when removing non existing property and no warning
        about this action
    """
    modifiers.ensure_only_supported("--force")
    if argv:
        raise CmdLineInputError()

    output, retval = utils.run(
        ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"])
    if retval != 0:
        utils.err("unable to check quorum status")
    if output.split("=")[-1].strip() != "1":
        utils.err("cluster is not waiting for nodes to establish quorum")

    all_nodes, report_list = get_existing_nodes_names(
        utils.get_corosync_conf_facade())
    if report_list:
        process_library_reports(report_list)

    unjoined_nodes = set(all_nodes) - set(utils.getCorosyncActiveNodes())
    if not unjoined_nodes:
        utils.err("no unjoined nodes found")
    if not modifiers.get("--force"):
        answer = utils.get_terminal_input(
            ("WARNING: If node(s) {nodes} are not powered off or they do" +
             " have access to shared resources, data corruption and/or" +
             " cluster failure may occur. Are you sure you want to" +
             " continue? [y/N] ").format(nodes=", ".join(unjoined_nodes)))
        if answer.lower() not in ["y", "yes"]:
            print("Canceled")
            return
    for node in unjoined_nodes:
        # pass --force so no warning will be displayed
        stonith.stonith_confirm(lib, [node],
                                parse_args.InputModifiers({"--force": ""}))

    output, retval = utils.run(
        ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"])
    if retval != 0:
        utils.err("unable to cancel waiting for nodes")
    print("Quorum unblocked")

    startup_fencing = utils.get_set_properties().get("startup-fencing", "")
    utils.set_cib_property(
        "startup-fencing",
        "false" if startup_fencing.lower() != "false" else "true",
    )
    utils.set_cib_property("startup-fencing", startup_fencing)
    print("Waiting for nodes canceled")
Exemple #5
0
def quorum_unblock_cmd(lib, argv, modifiers):
    """
    Options:
      * --force - no error when removing non existing property and no warning
        about this action
    """
    modifiers.ensure_only_supported("--force")
    if argv:
        raise CmdLineInputError()

    output, retval = utils.run(
        ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"])
    if (retval == 1 and "Error CS_ERR_NOT_EXIST" in output) or (
            retval == 0 and output.rsplit("=", maxsplit=1)[-1].strip() != "1"):
        utils.err("cluster is not waiting for nodes to establish quorum")
    if retval != 0:
        utils.err("unable to check quorum status")

    all_nodes, report_list = get_existing_nodes_names(
        utils.get_corosync_conf_facade())
    if report_list:
        process_library_reports(report_list)

    unjoined_nodes = set(all_nodes) - set(utils.getCorosyncActiveNodes())
    if not unjoined_nodes:
        utils.err("no unjoined nodes found")
    if not utils.get_continue_confirmation_or_force(
            f"If node(s) {format_list(unjoined_nodes)} are not powered off or they "
            "do have access to shared resources, data corruption and/or cluster "
            "failure may occur",
            modifiers.get("--force"),
    ):
        return
    for node in unjoined_nodes:
        # pass --force so no warning will be displayed
        stonith.stonith_confirm(lib, [node],
                                parse_args.InputModifiers({"--force": ""}))

    output, retval = utils.run(
        ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"])
    if retval != 0:
        utils.err("unable to cancel waiting for nodes")
    print_to_stderr("Quorum unblocked")

    startup_fencing = utils.get_set_properties().get("startup-fencing", "")
    utils.set_cib_property(
        "startup-fencing",
        "false" if startup_fencing.lower() != "false" else "true",
    )
    utils.set_cib_property("startup-fencing", startup_fencing)
    print_to_stderr("Waiting for nodes canceled")
Exemple #6
0
def cluster_pcsd_status(lib, argv, modifiers, dont_exit=False):
    """
    Options:
      * --request-timeout - HTTP timeout for node authorization check
    """
    del lib
    modifiers.ensure_only_supported("--request-timeout")
    bad_nodes = False
    if not argv:
        nodes, report_list = get_existing_nodes_names(
            utils.get_corosync_conf_facade())
        if not nodes and not dont_exit:
            report_list.append(
                ReportItem.error(
                    reports.messages.CorosyncConfigNoNodesDefined()))
        if report_list:
            process_library_reports(report_list)
        bad_nodes = check_nodes(nodes, "  ")
    else:
        bad_nodes = check_nodes(argv, "  ")
    if bad_nodes and not dont_exit:
        sys.exit(2)
Exemple #7
0
def nodes_status(lib, argv, modifiers):
    """
    Options:
      * -f - CIB file - for config subcommand and not for both or corosync
      * --corosync_conf - only for config subcommand

    NOTE: modifiers check is in subcommand
    """
    del lib
    if len(argv) == 1 and (argv[0] == "config"):
        modifiers.ensure_only_supported("-f", "--corosync_conf")
        if utils.hasCorosyncConf():
            corosync_nodes, report_list = get_existing_nodes_names(
                utils.get_corosync_conf_facade())
            if report_list:
                process_library_reports(report_list)
        else:
            corosync_nodes = []
        try:
            pacemaker_nodes = sorted([
                node.attrs.name for node in ClusterState(
                    get_cluster_status_dom(
                        utils.cmd_runner())).node_section.nodes
                if node.attrs.type != "remote"
            ])
        except LibraryError as e:
            process_library_reports(e.args)
        print("Corosync Nodes:")
        if corosync_nodes:
            print(" " + " ".join(corosync_nodes))
        print("Pacemaker Nodes:")
        if pacemaker_nodes:
            print(" " + " ".join(pacemaker_nodes))

        return

    if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"):
        modifiers.ensure_only_supported()
        all_nodes, report_list = get_existing_nodes_names(
            utils.get_corosync_conf_facade())
        if report_list:
            process_library_reports(report_list)
        online_nodes = utils.getCorosyncActiveNodes()
        offline_nodes = []
        for node in all_nodes:
            if node not in online_nodes:
                offline_nodes.append(node)

        online_nodes.sort()
        offline_nodes.sort()
        print("Corosync Nodes:")
        print(" ".join([" Online:"] + online_nodes))
        print(" ".join([" Offline:"] + offline_nodes))
        if argv[0] != "both":
            sys.exit(0)

    modifiers.ensure_only_supported("-f")
    info_dom = utils.getClusterState()

    nodes = info_dom.getElementsByTagName("nodes")
    if nodes.length == 0:
        utils.err("No nodes section found")

    onlinenodes = []
    offlinenodes = []
    standbynodes = []
    standbynodes_with_resources = []
    maintenancenodes = []
    remote_onlinenodes = []
    remote_offlinenodes = []
    remote_standbynodes = []
    remote_standbynodes_with_resources = []
    remote_maintenancenodes = []
    for node in nodes[0].getElementsByTagName("node"):
        node_name = node.getAttribute("name")
        node_remote = node.getAttribute("type") == "remote"
        if node.getAttribute("online") == "true":
            if node.getAttribute("standby") == "true":
                is_running_resources = (node.getAttribute("resources_running")
                                        != "0")
                if node_remote:
                    if is_running_resources:
                        remote_standbynodes_with_resources.append(node_name)
                    else:
                        remote_standbynodes.append(node_name)
                else:
                    if is_running_resources:
                        standbynodes_with_resources.append(node_name)
                    else:
                        standbynodes.append(node_name)
            if node.getAttribute("maintenance") == "true":
                if node_remote:
                    remote_maintenancenodes.append(node_name)
                else:
                    maintenancenodes.append(node_name)
            if (node.getAttribute("standby") == "false"
                    and node.getAttribute("maintenance") == "false"):
                if node_remote:
                    remote_onlinenodes.append(node_name)
                else:
                    onlinenodes.append(node_name)
        else:
            if node_remote:
                remote_offlinenodes.append(node_name)
            else:
                offlinenodes.append(node_name)

    print("Pacemaker Nodes:")
    print(" ".join([" Online:"] + onlinenodes))
    print(" ".join([" Standby:"] + standbynodes))
    print(" ".join([" Standby with resource(s) running:"] +
                   standbynodes_with_resources))
    print(" ".join([" Maintenance:"] + maintenancenodes))
    print(" ".join([" Offline:"] + offlinenodes))

    print("Pacemaker Remote Nodes:")
    print(" ".join([" Online:"] + remote_onlinenodes))
    print(" ".join([" Standby:"] + remote_standbynodes))
    print(" ".join([" Standby with resource(s) running:"] +
                   remote_standbynodes_with_resources))
    print(" ".join([" Maintenance:"] + remote_maintenancenodes))
    print(" ".join([" Offline:"] + remote_offlinenodes))
Exemple #8
0
def main(argv=None):
    # pylint: disable=global-statement
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements
    if completion.has_applicable_environment(os.environ):
        print(
            completion.make_suggestions(
                os.environ, usage.generate_completion_tree_from_usage()
            )
        )
        sys.exit()

    argv = argv if argv else sys.argv[1:]
    utils.subprocess_setup()
    global filename, usefile
    utils.pcs_options = {}

    # we want to support optional arguments for --wait, so if an argument
    # is specified with --wait (ie. --wait=30) then we use them
    waitsecs = None
    new_argv = []
    for arg in argv:
        if arg.startswith("--wait="):
            tempsecs = arg.replace("--wait=", "")
            if tempsecs:
                waitsecs = tempsecs
                arg = "--wait"
        new_argv.append(arg)
    argv = new_argv

    try:
        if "--" in argv:
            pcs_options, argv = getopt.gnu_getopt(
                argv, parse_args.PCS_SHORT_OPTIONS, parse_args.PCS_LONG_OPTIONS
            )
        else:
            # DEPRECATED
            # TODO remove
            # We want to support only the -- version
            (
                args_without_negative_nums,
                args_filtered_out,
            ) = parse_args.filter_out_non_option_negative_numbers(argv)
            if args_filtered_out:
                options_str = "', '".join(args_filtered_out)
                output.warn(
                    f"Using '{options_str}' without '--' is deprecated, those "
                    "parameters will be considered position independent "
                    "options in future pcs versions"
                )
            pcs_options, dummy_argv = getopt.gnu_getopt(
                args_without_negative_nums,
                parse_args.PCS_SHORT_OPTIONS,
                parse_args.PCS_LONG_OPTIONS,
            )
            argv = parse_args.filter_out_options(argv)
    except getopt.GetoptError as err:
        usage.main()
        print(err)
        if err.opt in {"V", "clone", "device", "watchdog"}:
            # Print error messages which point users to the changes section in
            # pcs manpage.
            # TODO remove
            # To be removed in the next significant version.
            print(f"Hint: {errors.HINT_SYNTAX_CHANGE}")
        sys.exit(1)

    full = False
    for option, dummy_value in pcs_options:
        if option == "--full":
            full = True
            break

    for opt, val in pcs_options:
        if not opt in utils.pcs_options:
            utils.pcs_options[opt] = val
        else:
            # If any options are a list then they've been entered twice which
            # isn't valid
            utils.err("%s can only be used once" % opt)

        if opt in ("-h", "--help"):
            if not argv:
                usage.main()
                sys.exit()
            else:
                argv = [argv[0], "help"] + argv[1:]
        elif opt == "-f":
            usefile = True
            filename = val
            utils.usefile = usefile
            utils.filename = filename
        elif opt == "--corosync_conf":
            settings.corosync_conf_file = val
        elif opt == "--version":
            print(settings.pcs_version)
            if full:
                print(
                    " ".join(
                        sorted(
                            [
                                feat["id"]
                                for feat in capabilities.get_pcs_capabilities()
                            ]
                        )
                    )
                )
            sys.exit()
        elif opt == "--fullhelp":
            usage.full_usage()
            sys.exit()
        elif opt == "--wait":
            utils.pcs_options[opt] = waitsecs
        elif opt == "--request-timeout":
            request_timeout_valid = False
            try:
                timeout = int(val)
                if timeout > 0:
                    utils.pcs_options[opt] = timeout
                    request_timeout_valid = True
            except ValueError:
                pass
            if not request_timeout_valid:
                utils.err(
                    (
                        "'{0}' is not a valid --request-timeout value, use "
                        "a positive integer"
                    ).format(val)
                )

    logger = logging.getLogger("pcs")
    logger.propagate = 0
    logger.handlers = []

    if (os.getuid() != 0) and (argv and argv[0] != "help") and not usefile:
        _non_root_run(argv)
    cmd_map = {
        "resource": resource.resource_cmd,
        "cluster": cluster.cluster_cmd,
        "stonith": stonith.stonith_cmd,
        "property": prop.property_cmd,
        "constraint": constraint.constraint_cmd,
        "acl": acl.acl_cmd,
        "status": status.status_cmd,
        "config": config.config_cmd,
        "pcsd": pcsd.pcsd_cmd,
        "node": node.node_cmd,
        "quorum": quorum.quorum_cmd,
        "qdevice": qdevice.qdevice_cmd,
        "alert": alert.alert_cmd,
        "booth": booth.booth_cmd,
        "host": host.host_cmd,
        "client": client.client_cmd,
        "dr": dr.dr_cmd,
        "tag": tag.tag_cmd,
        "help": lambda lib, argv, modifiers: usage.main(),
    }
    try:
        routing.create_router(cmd_map, [])(
            utils.get_library_wrapper(), argv, utils.get_input_modifiers()
        )
    except LibraryError as e:
        process_library_reports(e.args)
    except errors.CmdLineInputError:
        if argv and argv[0] in cmd_map:
            usage.show(argv[0], [])
        else:
            usage.main()
        sys.exit(1)
Exemple #9
0
def config_restore_remote(infile_name, infile_obj):
    """
    Commandline options:
      * --request-timeout - timeout for HTTP requests
    """
    extracted = {
        "version.txt": "",
        "corosync.conf": "",
    }
    try:
        tarball = tarfile.open(infile_name, "r|*", infile_obj)
        while True:
            # next(tarball) does not work in python2.6
            tar_member_info = tarball.next()
            if tar_member_info is None:
                break
            if tar_member_info.name in extracted:
                tar_member = tarball.extractfile(tar_member_info)
                extracted[tar_member_info.name] = tar_member.read()
                tar_member.close()
        tarball.close()
    except (tarfile.TarError, EnvironmentError) as e:
        utils.err("unable to read the tarball: %s" % e)

    config_backup_check_version(extracted["version.txt"])

    node_list, report_list = get_existing_nodes_names(
        utils.get_corosync_conf_facade(
            conf_text=extracted["corosync.conf"].decode("utf-8")))
    if report_list:
        process_library_reports(report_list)
    if not node_list:
        utils.err("no nodes found in the tarball")

    err_msgs = []
    for node in node_list:
        try:
            retval, output = utils.checkStatus(node)
            if retval != 0:
                err_msgs.append(output)
                continue
            _status = json.loads(output)
            if (_status["corosync"] or _status["pacemaker"] or
                    # not supported by older pcsd, do not fail if not present
                    _status.get("pacemaker_remote", False)):
                err_msgs.append(
                    "Cluster is currently running on node %s. You need to stop "
                    "the cluster in order to restore the configuration." %
                    node)
                continue
        except (ValueError, NameError, LookupError):
            err_msgs.append("unable to determine status of the node %s" % node)
    if err_msgs:
        for msg in err_msgs:
            utils.err(msg, False)
        sys.exit(1)

    # Temporarily disable config files syncing thread in pcsd so it will not
    # rewrite restored files. 10 minutes should be enough time to restore.
    # If node returns HTTP 404 it does not support config syncing at all.
    for node in node_list:
        retval, output = utils.pauseConfigSyncing(node, 10 * 60)
        if not (retval == 0 or "(HTTP error: 404)" in output):
            utils.err(output)

    if infile_obj:
        infile_obj.seek(0)
        tarball_data = infile_obj.read()
    else:
        with open(infile_name, "rb") as tarball:
            tarball_data = tarball.read()

    error_list = []
    for node in node_list:
        retval, error = utils.restoreConfig(node, tarball_data)
        if retval != 0:
            error_list.append(error)
    if error_list:
        utils.err("unable to restore all nodes\n" + "\n".join(error_list))