def config_show(lib, argv, modifiers): """ Options: * -f - CIB file, when getting cluster name on remote node (corosync.conf doesn't exist) * --corosync_conf - corosync.conf file """ modifiers.ensure_only_supported("-f", "--corosync_conf") if argv: raise CmdLineInputError() print("Cluster Name: %s" % utils.getClusterName()) status.nodes_status(lib, ["config"], modifiers.get_subset("-f")) print() print("\n".join(_config_show_cib_lines(lib))) if (utils.hasCorosyncConf() and not modifiers.is_specified("-f") and not modifiers.is_specified("--corosync_conf")): cluster.cluster_uidgid(lib, [], modifiers.get_subset(), silent_list=True) if modifiers.is_specified("--corosync_conf") or utils.hasCorosyncConf(): print() print("Quorum:") try: config = lib_quorum.get_config(utils.get_lib_env()) print("\n".join(indent(quorum.quorum_config_to_str(config)))) except LibraryError as e: process_library_reports(e.args)
def pcsd_status_cmd( lib: Any, argv: Sequence[str], modifiers: InputModifiers, dont_exit: bool = False, ) -> None: """ Options: * --request-timeout - HTTP timeout for node authorization check """ # If no arguments get current cluster node status, otherwise get listed # nodes status del lib modifiers.ensure_only_supported("--request-timeout") bad_nodes = False if not argv: nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade() ) if not nodes and not dont_exit: report_list.append( ReportItem.error( reports.messages.CorosyncConfigNoNodesDefined() ) ) if report_list: process_library_reports(report_list) bad_nodes = _check_nodes(nodes, " ") else: bad_nodes = _check_nodes(argv, " ") if bad_nodes and not dont_exit: sys.exit(2)
def get_fence_agent_info(lib, argv, modifiers): """ Options: no options """ del lib modifiers.ensure_only_supported() if len(argv) != 1: utils.err("One parameter expected") agent = argv[0] if not agent.startswith("stonith:"): utils.err("Invalid fence agent name") runner = utils.cmd_runner() try: metadata = lib_ra.StonithAgent(runner, agent[len("stonith:"):]) info = metadata.get_full_info() info["name"] = "stonith:{0}".format(info["name"]) print(json.dumps(info)) except lib_ra.ResourceAgentError as e: process_library_reports( [lib_ra.resource_agent_error_to_report_item(e)]) except LibraryError as e: process_library_reports(e.args)
def quorum_unblock_cmd(lib, argv, modifiers): """ Options: * --force - no error when removing non existing property and no warning about this action """ modifiers.ensure_only_supported("--force") if argv: raise CmdLineInputError() output, retval = utils.run( ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]) if retval != 0: utils.err("unable to check quorum status") if output.split("=")[-1].strip() != "1": utils.err("cluster is not waiting for nodes to establish quorum") all_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) unjoined_nodes = set(all_nodes) - set(utils.getCorosyncActiveNodes()) if not unjoined_nodes: utils.err("no unjoined nodes found") if not modifiers.get("--force"): answer = utils.get_terminal_input( ("WARNING: If node(s) {nodes} are not powered off or they do" + " have access to shared resources, data corruption and/or" + " cluster failure may occur. Are you sure you want to" + " continue? [y/N] ").format(nodes=", ".join(unjoined_nodes))) if answer.lower() not in ["y", "yes"]: print("Canceled") return for node in unjoined_nodes: # pass --force so no warning will be displayed stonith.stonith_confirm(lib, [node], parse_args.InputModifiers({"--force": ""})) output, retval = utils.run( ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]) if retval != 0: utils.err("unable to cancel waiting for nodes") print("Quorum unblocked") startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true", ) utils.set_cib_property("startup-fencing", startup_fencing) print("Waiting for nodes canceled")
def quorum_unblock_cmd(lib, argv, modifiers): """ Options: * --force - no error when removing non existing property and no warning about this action """ modifiers.ensure_only_supported("--force") if argv: raise CmdLineInputError() output, retval = utils.run( ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]) if (retval == 1 and "Error CS_ERR_NOT_EXIST" in output) or ( retval == 0 and output.rsplit("=", maxsplit=1)[-1].strip() != "1"): utils.err("cluster is not waiting for nodes to establish quorum") if retval != 0: utils.err("unable to check quorum status") all_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) unjoined_nodes = set(all_nodes) - set(utils.getCorosyncActiveNodes()) if not unjoined_nodes: utils.err("no unjoined nodes found") if not utils.get_continue_confirmation_or_force( f"If node(s) {format_list(unjoined_nodes)} are not powered off or they " "do have access to shared resources, data corruption and/or cluster " "failure may occur", modifiers.get("--force"), ): return for node in unjoined_nodes: # pass --force so no warning will be displayed stonith.stonith_confirm(lib, [node], parse_args.InputModifiers({"--force": ""})) output, retval = utils.run( ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]) if retval != 0: utils.err("unable to cancel waiting for nodes") print_to_stderr("Quorum unblocked") startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true", ) utils.set_cib_property("startup-fencing", startup_fencing) print_to_stderr("Waiting for nodes canceled")
def cluster_pcsd_status(lib, argv, modifiers, dont_exit=False): """ Options: * --request-timeout - HTTP timeout for node authorization check """ del lib modifiers.ensure_only_supported("--request-timeout") bad_nodes = False if not argv: nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if not nodes and not dont_exit: report_list.append( ReportItem.error( reports.messages.CorosyncConfigNoNodesDefined())) if report_list: process_library_reports(report_list) bad_nodes = check_nodes(nodes, " ") else: bad_nodes = check_nodes(argv, " ") if bad_nodes and not dont_exit: sys.exit(2)
def nodes_status(lib, argv, modifiers): """ Options: * -f - CIB file - for config subcommand and not for both or corosync * --corosync_conf - only for config subcommand NOTE: modifiers check is in subcommand """ del lib if len(argv) == 1 and (argv[0] == "config"): modifiers.ensure_only_supported("-f", "--corosync_conf") if utils.hasCorosyncConf(): corosync_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) else: corosync_nodes = [] try: pacemaker_nodes = sorted([ node.attrs.name for node in ClusterState( get_cluster_status_dom( utils.cmd_runner())).node_section.nodes if node.attrs.type != "remote" ]) except LibraryError as e: process_library_reports(e.args) print("Corosync Nodes:") if corosync_nodes: print(" " + " ".join(corosync_nodes)) print("Pacemaker Nodes:") if pacemaker_nodes: print(" " + " ".join(pacemaker_nodes)) return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): modifiers.ensure_only_supported() all_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node not in online_nodes: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print("Corosync Nodes:") print(" ".join([" Online:"] + online_nodes)) print(" ".join([" Offline:"] + offline_nodes)) if argv[0] != "both": sys.exit(0) modifiers.ensure_only_supported("-f") info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] standbynodes_with_resources = [] maintenancenodes = [] remote_onlinenodes = [] remote_offlinenodes = [] remote_standbynodes = [] remote_standbynodes_with_resources = [] remote_maintenancenodes = [] for node in nodes[0].getElementsByTagName("node"): node_name = node.getAttribute("name") node_remote = node.getAttribute("type") == "remote" if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": is_running_resources = (node.getAttribute("resources_running") != "0") if node_remote: if is_running_resources: remote_standbynodes_with_resources.append(node_name) else: remote_standbynodes.append(node_name) else: if is_running_resources: standbynodes_with_resources.append(node_name) else: standbynodes.append(node_name) if node.getAttribute("maintenance") == "true": if node_remote: remote_maintenancenodes.append(node_name) else: maintenancenodes.append(node_name) if (node.getAttribute("standby") == "false" and node.getAttribute("maintenance") == "false"): if node_remote: remote_onlinenodes.append(node_name) else: onlinenodes.append(node_name) else: if node_remote: remote_offlinenodes.append(node_name) else: offlinenodes.append(node_name) print("Pacemaker Nodes:") print(" ".join([" Online:"] + onlinenodes)) print(" ".join([" Standby:"] + standbynodes)) print(" ".join([" Standby with resource(s) running:"] + standbynodes_with_resources)) print(" ".join([" Maintenance:"] + maintenancenodes)) print(" ".join([" Offline:"] + offlinenodes)) print("Pacemaker Remote Nodes:") print(" ".join([" Online:"] + remote_onlinenodes)) print(" ".join([" Standby:"] + remote_standbynodes)) print(" ".join([" Standby with resource(s) running:"] + remote_standbynodes_with_resources)) print(" ".join([" Maintenance:"] + remote_maintenancenodes)) print(" ".join([" Offline:"] + remote_offlinenodes))
def main(argv=None): # pylint: disable=global-statement # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements if completion.has_applicable_environment(os.environ): print( completion.make_suggestions( os.environ, usage.generate_completion_tree_from_usage() ) ) sys.exit() argv = argv if argv else sys.argv[1:] utils.subprocess_setup() global filename, usefile utils.pcs_options = {} # we want to support optional arguments for --wait, so if an argument # is specified with --wait (ie. --wait=30) then we use them waitsecs = None new_argv = [] for arg in argv: if arg.startswith("--wait="): tempsecs = arg.replace("--wait=", "") if tempsecs: waitsecs = tempsecs arg = "--wait" new_argv.append(arg) argv = new_argv try: if "--" in argv: pcs_options, argv = getopt.gnu_getopt( argv, parse_args.PCS_SHORT_OPTIONS, parse_args.PCS_LONG_OPTIONS ) else: # DEPRECATED # TODO remove # We want to support only the -- version ( args_without_negative_nums, args_filtered_out, ) = parse_args.filter_out_non_option_negative_numbers(argv) if args_filtered_out: options_str = "', '".join(args_filtered_out) output.warn( f"Using '{options_str}' without '--' is deprecated, those " "parameters will be considered position independent " "options in future pcs versions" ) pcs_options, dummy_argv = getopt.gnu_getopt( args_without_negative_nums, parse_args.PCS_SHORT_OPTIONS, parse_args.PCS_LONG_OPTIONS, ) argv = parse_args.filter_out_options(argv) except getopt.GetoptError as err: usage.main() print(err) if err.opt in {"V", "clone", "device", "watchdog"}: # Print error messages which point users to the changes section in # pcs manpage. # TODO remove # To be removed in the next significant version. print(f"Hint: {errors.HINT_SYNTAX_CHANGE}") sys.exit(1) full = False for option, dummy_value in pcs_options: if option == "--full": full = True break for opt, val in pcs_options: if not opt in utils.pcs_options: utils.pcs_options[opt] = val else: # If any options are a list then they've been entered twice which # isn't valid utils.err("%s can only be used once" % opt) if opt in ("-h", "--help"): if not argv: usage.main() sys.exit() else: argv = [argv[0], "help"] + argv[1:] elif opt == "-f": usefile = True filename = val utils.usefile = usefile utils.filename = filename elif opt == "--corosync_conf": settings.corosync_conf_file = val elif opt == "--version": print(settings.pcs_version) if full: print( " ".join( sorted( [ feat["id"] for feat in capabilities.get_pcs_capabilities() ] ) ) ) sys.exit() elif opt == "--fullhelp": usage.full_usage() sys.exit() elif opt == "--wait": utils.pcs_options[opt] = waitsecs elif opt == "--request-timeout": request_timeout_valid = False try: timeout = int(val) if timeout > 0: utils.pcs_options[opt] = timeout request_timeout_valid = True except ValueError: pass if not request_timeout_valid: utils.err( ( "'{0}' is not a valid --request-timeout value, use " "a positive integer" ).format(val) ) logger = logging.getLogger("pcs") logger.propagate = 0 logger.handlers = [] if (os.getuid() != 0) and (argv and argv[0] != "help") and not usefile: _non_root_run(argv) cmd_map = { "resource": resource.resource_cmd, "cluster": cluster.cluster_cmd, "stonith": stonith.stonith_cmd, "property": prop.property_cmd, "constraint": constraint.constraint_cmd, "acl": acl.acl_cmd, "status": status.status_cmd, "config": config.config_cmd, "pcsd": pcsd.pcsd_cmd, "node": node.node_cmd, "quorum": quorum.quorum_cmd, "qdevice": qdevice.qdevice_cmd, "alert": alert.alert_cmd, "booth": booth.booth_cmd, "host": host.host_cmd, "client": client.client_cmd, "dr": dr.dr_cmd, "tag": tag.tag_cmd, "help": lambda lib, argv, modifiers: usage.main(), } try: routing.create_router(cmd_map, [])( utils.get_library_wrapper(), argv, utils.get_input_modifiers() ) except LibraryError as e: process_library_reports(e.args) except errors.CmdLineInputError: if argv and argv[0] in cmd_map: usage.show(argv[0], []) else: usage.main() sys.exit(1)
def config_restore_remote(infile_name, infile_obj): """ Commandline options: * --request-timeout - timeout for HTTP requests """ extracted = { "version.txt": "", "corosync.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: # next(tarball) does not work in python2.6 tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade( conf_text=extracted["corosync.conf"].decode("utf-8"))) if report_list: process_library_reports(report_list) if not node_list: utils.err("no nodes found in the tarball") err_msgs = [] for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: err_msgs.append(output) continue _status = json.loads(output) if (_status["corosync"] or _status["pacemaker"] or # not supported by older pcsd, do not fail if not present _status.get("pacemaker_remote", False)): err_msgs.append( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node) continue except (ValueError, NameError, LookupError): err_msgs.append("unable to determine status of the node %s" % node) if err_msgs: for msg in err_msgs: utils.err(msg, False) sys.exit(1) # Temporarily disable config files syncing thread in pcsd so it will not # rewrite restored files. 10 minutes should be enough time to restore. # If node returns HTTP 404 it does not support config syncing at all. for node in node_list: retval, output = utils.pauseConfigSyncing(node, 10 * 60) if not (retval == 0 or "(HTTP error: 404)" in output): utils.err(output) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "rb") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))