def cluster_node(argv): if len(argv) != 2: usage.cluster() sys.exit(1) if argv[0] == "add": add_node = True elif argv[0] == "remove": add_node = False else: usage.cluster() sys.exit(1) node = argv[1] status, output = utils.checkStatus(node) if status == 2: print "Error: pcsd is not running on %s" % node sys.exit(1) elif status == 3: print "Error: %s is not yet authenticated (try pcs cluster auth %s)" % ( node, node) sys.exit(1) if add_node == True: corosync_conf = None for my_node in utils.getNodesFromCorosyncConf(): retval, output = utils.addLocalNode(my_node, node) if retval != 0: print "Error: unable to add %s on %s - %s" % (node, my_node, output.strip()) else: print "%s: Corosync updated" % my_node corosync_conf = output if corosync_conf != None: utils.setCorosyncConfig(node, corosync_conf) utils.startCluster(node) else: print "Error: Unable to update any nodes" sys.exit(1) else: nodesRemoved = False output, retval = utils.run(["crm_node", "--force", "-R", node]) for my_node in utils.getNodesFromCorosyncConf(): retval, output = utils.removeLocalNode(my_node, node) if retval != 0: print "Error: unable to remove %s on %s - %s" % ( node, my_node, output.strip()) else: if output[0] == 0: print "%s: Corosync updated" % my_node nodesRemoved = True else: print "%s: Error executing command occured: %s" % ( my_node, "".join(output[1])) if nodesRemoved == False: print "Error: Unable to update any nodes" sys.exit(1)
def cluster_node(argv): if len(argv) != 2: usage.cluster(); sys.exit(1) if argv[0] == "add": add_node = True elif argv[0] == "remove": add_node = False else: usage.cluster(); sys.exit(1) node = argv[1] status,output = utils.checkStatus(node) if status == 2: print "Error: pcsd is not running on %s" % node sys.exit(1) elif status == 3: print "Error: %s is not yet authenticated (try pcs cluster auth %s)" % (node, node) sys.exit(1) if add_node == True: corosync_conf = None for my_node in utils.getNodesFromCorosyncConf(): retval, output = utils.addLocalNode(my_node,node) if retval != 0: print "Error: unable to add %s on %s - %s" % (node,my_node,output.strip()) else: print "%s: Corosync updated" % my_node corosync_conf = output if corosync_conf != None: utils.setCorosyncConfig(node, corosync_conf) utils.startCluster(node) else: print "Error: Unable to update any nodes" sys.exit(1) else: nodesRemoved = False output, retval = utils.run(["crm_node", "--force","-R", node]) for my_node in utils.getNodesFromCorosyncConf(): retval, output = utils.removeLocalNode(my_node,node) if retval != 0: print "Error: unable to remove %s on %s - %s" % (node,my_node,output.strip()) else: if output[0] == 0: print "%s: Corosync updated" % my_node nodesRemoved = True else: print "%s: Error executing command occured: %s" % (my_node, "".join(output[1])) if nodesRemoved == False: print "Error: Unable to update any nodes" sys.exit(1)
def cluster_destroy(argv): if "--all" in utils.pcs_options: threads = {} for node in utils.getNodesFromCorosyncConf(): threads[node] = DestroyClusterThread(node) threads[node].start() for thread in threads.values(): thread.join() else: print "Shutting down pacemaker/corosync services..." print os.system("service pacemaker stop") print os.system("service corosync stop") print "Killing any remaining services..." os.system("killall -q -9 corosync aisexec heartbeat pacemakerd ccm stonithd ha_logd lrmd crmd pengine attrd pingd mgmtd cib fenced dlm_controld gfs_controld") utils.disableServices() print "Removing all cluster configuration files..." if utils.is_rhel6(): os.system("rm /etc/cluster/cluster.conf") else: os.system("rm /etc/corosync/corosync.conf") state_files = ["cib.xml*", "cib-*", "core.*", "hostcache", "cts.*", "pe*.bz2","cib.*"] for name in state_files: os.system("find /var/lib -name '"+name+"' -exec rm -f \{\} \;")
def node_standby(argv,standby=True): if len(argv) == 0 and "--all" not in utils.pcs_options: if standby: usage.cluster(["standby"]) else: usage.cluster(["unstandby"]) sys.exit(1) nodes = utils.getNodesFromCorosyncConf() if "--all" not in utils.pcs_options: nodeFound = False for node in nodes: if node == argv[0]: nodeFound = True break if not nodeFound: utils.err("node '%s' does not appear to exist in configuration" % argv[0]) if standby: utils.run(["crm_standby", "-v", "on", "-N", node]) else: utils.run(["crm_standby", "-D", "-N", node]) else: for node in nodes: if standby: utils.run(["crm_standby", "-v", "on", "-N", node]) else: utils.run(["crm_standby", "-D", "-N", node])
def cluster_cmd(argv): if len(argv) == 0: usage.cluster() exit(1) sub_cmd = argv.pop(0) if (sub_cmd == "help"): usage.cluster() elif (sub_cmd == "configure"): corosync_configure(argv) elif (sub_cmd == "sync"): sync_nodes(utils.getNodesFromCorosyncConf(),utils.getCorosyncConf()) elif (sub_cmd == "gui-status"): cluster_gui_status(argv) elif (sub_cmd == "auth"): cluster_auth(argv) elif (sub_cmd == "token"): cluster_token(argv) elif (sub_cmd == "start"): start_cluster(argv) elif (sub_cmd == "stop"): stop_cluster(argv) elif (sub_cmd == "startall"): start_cluster_all() elif (sub_cmd == "stopall"): stop_cluster_all() else: usage.cluster()
def cluster_destroy(argv): if "--all" in utils.pcs_options: threads = {} for node in utils.getNodesFromCorosyncConf(): threads[node] = DestroyClusterThread(node) threads[node].start() for thread in threads.values(): thread.join() else: print "Shutting down pacemaker/corosync services..." print os.system("service pacemaker stop") print os.system("service corosync stop") print "Killing any remaining services..." os.system( "killall -q -9 corosync aisexec heartbeat pacemakerd ccm stonithd ha_logd lrmd crmd pengine attrd pingd mgmtd cib fenced dlm_controld gfs_controld" ) print "Removing all cluster configuration files..." if utils.is_rhel6(): os.system("rm /etc/cluster/cluster.conf") else: os.system("rm /etc/corosync/corosync.conf") state_files = [ "cib.xml*", "cib-*", "core.*", "hostcache", "cts.*", "pe*.bz2", "cib.*" ] for name in state_files: os.system("find /var/lib -name '" + name + "' -exec rm -f \{\} \;")
def node_standby(argv, standby=True): if len(argv) == 0 and "--all" not in utils.pcs_options: if standby: usage.cluster(["standby"]) else: usage.cluster(["unstandby"]) sys.exit(1) nodes = utils.getNodesFromCorosyncConf() if "--all" not in utils.pcs_options: nodeFound = False for node in nodes: if node == argv[0]: nodeFound = True break if not nodeFound: utils.err("node '%s' does not appear to exist in configuration" % argv[0]) if standby: utils.run(["crm_standby", "-v", "on", "-N", node]) else: utils.run(["crm_standby", "-D", "-N", node]) else: for node in nodes: if standby: utils.run(["crm_standby", "-v", "on", "-N", node]) else: utils.run(["crm_standby", "-D", "-N", node])
def cluster_cmd(argv): if len(argv) == 0: usage.cluster() exit(1) sub_cmd = argv.pop(0) if (sub_cmd == "help"): usage.cluster() elif (sub_cmd == "setup"): corosync_setup(argv) elif (sub_cmd == "sync"): sync_nodes(utils.getNodesFromCorosyncConf(), utils.getCorosyncConf()) elif (sub_cmd == "status"): status.cluster_status(argv) elif (sub_cmd == "pcsd-status"): cluster_gui_status(argv) elif (sub_cmd == "auth"): cluster_auth(argv) elif (sub_cmd == "token"): cluster_token(argv) elif (sub_cmd == "start"): if "--all" in utils.pcs_options: start_cluster_all() else: start_cluster(argv) elif (sub_cmd == "stop"): if "--all" in utils.pcs_options: stop_cluster_all() else: stop_cluster(argv) elif (sub_cmd == "force_stop"): force_stop_cluster(argv) elif (sub_cmd == "standby"): node_standby(argv) elif (sub_cmd == "unstandby"): node_standby(argv, False) elif (sub_cmd == "enable"): if "--all" in utils.pcs_options: enable_cluster_all() else: enable_cluster(argv) elif (sub_cmd == "disable"): if "--all" in utils.pcs_options: disable_cluster_all() else: disable_cluster(argv) elif (sub_cmd == "cib"): get_cib(argv) elif (sub_cmd == "push"): cluster_push(argv) elif (sub_cmd == "node"): cluster_node(argv) elif (sub_cmd == "localnode"): cluster_localnode(argv) elif (sub_cmd == "corosync"): cluster_get_corosync_conf(argv) else: usage.cluster() sys.exit(1)
def cluster_cmd(argv): if len(argv) == 0: usage.cluster() exit(1) sub_cmd = argv.pop(0) if (sub_cmd == "help"): usage.cluster() elif (sub_cmd == "setup"): corosync_setup(argv) elif (sub_cmd == "sync"): sync_nodes(utils.getNodesFromCorosyncConf(),utils.getCorosyncConf()) elif (sub_cmd == "status"): status.cluster_status(argv) elif (sub_cmd == "pcsd-status"): cluster_gui_status(argv) elif (sub_cmd == "auth"): cluster_auth(argv) elif (sub_cmd == "token"): cluster_token(argv) elif (sub_cmd == "start"): if "--all" in utils.pcs_options: start_cluster_all() else: start_cluster(argv) elif (sub_cmd == "stop"): if "--all" in utils.pcs_options: stop_cluster_all() else: stop_cluster(argv) elif (sub_cmd == "force_stop"): force_stop_cluster(argv) elif (sub_cmd == "standby"): node_standby(argv) elif (sub_cmd == "unstandby"): node_standby(argv, False) elif (sub_cmd == "enable"): if "--all" in utils.pcs_options: enable_cluster_all() else: enable_cluster(argv) elif (sub_cmd == "disable"): if "--all" in utils.pcs_options: disable_cluster_all() else: disable_cluster(argv) elif (sub_cmd == "cib"): get_cib(argv) elif (sub_cmd == "push"): cluster_push(argv) elif (sub_cmd == "node"): cluster_node(argv) elif (sub_cmd == "localnode"): cluster_localnode(argv) elif (sub_cmd == "corosync"): cluster_get_corosync_conf(argv) else: usage.cluster() sys.exit(1)
def stop_cluster_all(): threads = {} for node in utils.getNodesFromCorosyncConf(): threads[node] = StopClusterThread(node) threads[node].start() for thread in threads.values(): thread.join()
def config_restore_remote(infile_name, infile_obj): extracted = { "version.txt": "", "corosync.conf": "", "cluster.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list = utils.getNodesFromCorosyncConf( extracted["cluster.conf" if utils.is_rhel6() else "corosync.conf"] ) if not node_list: utils.err("no nodes found in the tarball") for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: utils.err("unable to determine status of the node %s" % node) status = json.loads(output) if status["corosync"] or status["pacemaker"] or status["cman"]: utils.err( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node ) except (ValueError, NameError): utils.err("unable to determine status of the node %s" % node) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "r") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))
def nodes_status(argv): if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.getNodesFromCorosyncConf() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node in online_nodes: next else: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print "Corosync Nodes:" print " Online:", for node in online_nodes: print node, print "" print " Offline:", for node in offline_nodes: print node, print "" if argv[0] != "both": sys.exit(0) (output, retval) = utils.run(["/usr/sbin/crm_mon", "-1"]) if (retval != 0): print "Error running crm_mon, is pacemaker running?" sys.exit(1) print "Pacemaker Nodes:" onlinereg = re.compile(r"^Online: (.*)$",re.M) onlinematch = onlinereg.search(output) if onlinematch: onlinenodes = onlinematch.group(1).split(" ") onlinenodes.pop(0) onlinenodes.pop() onlinenodes.sort() print " Online:", for node in onlinenodes: print node, print "" offlinereg = re.compile(r"^OFFLINE: (.*)$", re.M) offlinematch = offlinereg.search(output) if offlinematch: offlinenodes = offlinematch.group(1).split(" ") offlinenodes.pop(0) offlinenodes.pop() offlinenodes.sort() print " Offline:", for node in offlinenodes: print node, print ""
def config_restore_remote(infile_name, infile_obj): extracted = { "version.txt": "", "corosync.conf": "", "cluster.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list = utils.getNodesFromCorosyncConf( extracted["cluster.conf" if utils.is_rhel6() else "corosync.conf"]) if not node_list: utils.err("no nodes found in the tarball") for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: utils.err("unable to determine status of the node %s" % node) status = json.loads(output) if status["corosync"] or status["pacemaker"] or status["cman"]: utils.err( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node) except (ValueError, NameError): utils.err("unable to determine status of the node %s" % node) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "r") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))
def cluster_gui_status(argv,dont_exit = False): bad_nodes = False if len(argv) == 0: nodes = utils.getNodesFromCorosyncConf() if len(nodes) == 0: utils.err("no nodes found in corosync.conf") bad_nodes = check_nodes(nodes, " ") else: bad_nodes = check_nodes(argv, " ") if bad_nodes and not dont_exit: sys.exit(1)
def cluster_gui_status(argv, dont_exit=False): bad_nodes = False if len(argv) == 0: nodes = utils.getNodesFromCorosyncConf() if len(nodes) == 0: utils.err("no nodes found in corosync.conf") bad_nodes = check_nodes(nodes, " ") else: bad_nodes = check_nodes(argv, " ") if bad_nodes and not dont_exit: sys.exit(1)
def cluster_uidgid_rhel6(argv, silent_list = False): if not os.path.isfile("/etc/cluster/cluster.conf"): utils.err("the /etc/cluster/cluster.conf file doesn't exist on this machine, create a cluster before running this command") if len(argv) == 0: found = False output, retval = utils.run(["/usr/sbin/ccs", "-f", "/etc/cluster/cluster.conf", "--lsmisc"]) if retval != 0: utils.err("error running ccs\n" + output) lines = output.split('\n') for line in lines: if line.startswith('UID/GID: '): print line found = True if not found and not silent_list: print "No uidgids configured in cluster.conf" return command = argv.pop(0) uid="" gid="" if (command == "add" or command == "rm") and len(argv) > 0: for arg in argv: if arg.find('=') == -1: utils.err("uidgid options must be of the form uid=<uid> gid=<gid>") (k,v) = arg.split('=',1) if k != "uid" and k != "gid": utils.err("%s is not a valid key, you must use uid or gid" %k) if k == "uid": uid = v if k == "gid": gid = v if uid == "" and gid == "": utils.err("you must set either uid or gid") if command == "add": output, retval = utils.run(["/usr/sbin/ccs", "-f", "/etc/cluster/cluster.conf", "--setuidgid", "uid="+uid, "gid="+gid]) if retval != 0: utils.err("unable to add uidgid\n" + output.rstrip()) elif command == "rm": output, retval = utils.run(["/usr/sbin/ccs", "-f", "/etc/cluster/cluster.conf", "--rmuidgid", "uid="+uid, "gid="+gid]) if retval != 0: utils.err("unable to remove uidgid\n" + output.rstrip()) # If we make a change, we sync out the changes to all nodes unless we're using -f if not utils.usefile: sync_nodes(utils.getNodesFromCorosyncConf(), utils.getCorosyncConf()) else: usage.cluster(["uidgid"]) exit(1)
def cluster_cmd(argv): if len(argv) == 0: usage.cluster() exit(1) sub_cmd = argv.pop(0) if (sub_cmd == "help"): usage.cluster() elif (sub_cmd == "configure"): corosync_configure(argv) elif (sub_cmd == "sync"): sync_nodes(utils.getNodesFromCorosyncConf(),utils.getCorosyncConf()) elif (sub_cmd == "gui-status"): cluster_gui_status(argv) elif (sub_cmd == "auth"): cluster_auth(argv) elif (sub_cmd == "token"): cluster_token(argv) elif (sub_cmd == "start"): start_cluster(argv) elif (sub_cmd == "stop"): stop_cluster(argv) elif (sub_cmd == "enable"): enable_cluster() elif (sub_cmd == "disable"): disable_cluster() elif (sub_cmd == "startall"): start_cluster_all() elif (sub_cmd == "stopall"): stop_cluster_all() elif (sub_cmd == "enableall"): enable_cluster_all() elif (sub_cmd == "disableall"): disable_cluster_all() elif (sub_cmd == "cib"): get_cib() elif (sub_cmd == "push"): cluster_push(argv) elif (sub_cmd == "node"): cluster_node(argv) elif (sub_cmd == "localnode"): cluster_localnode(argv) elif (sub_cmd == "get_conf"): cluster_get_corosync_conf(argv) else: usage.cluster()
def pcsd_sync_certs(argv): nodes = utils.getNodesFromCorosyncConf() print ( "Synchronizing pcsd certificates on nodes {0}. pcsd needs to be " "restarted on the nodes in order to reload the certificates." ).format(", ".join(nodes)) print pcsd_data = {'nodes': nodes} for cmd in ['send_local_certs', 'pcsd_restart_nodes']: error = '' output, retval = utils.run_pcsdcli(cmd, pcsd_data) if retval == 0 and output['status'] == 'ok' and output['data']: try: if output['data']['status'] != 'ok' and output['data']['text']: error = output['data']['text'] except KeyError: error = 'Unable to communicate with pcsd' else: error = 'Unable to sync pcsd certificates' if error: utils.err(error, False)
def pcsd_sync_certs(argv): nodes = utils.getNodesFromCorosyncConf() print( ("Synchronizing pcsd certificates on nodes {0}. pcsd needs to be " "restarted on the nodes in order to reload the certificates.").format( ", ".join(nodes))) print() pcsd_data = {'nodes': nodes} for cmd in ['send_local_certs', 'pcsd_restart_nodes']: error = '' output, retval = utils.run_pcsdcli(cmd, pcsd_data) if retval == 0 and output['status'] == 'ok' and output['data']: try: if output['data']['status'] != 'ok' and output['data']['text']: error = output['data']['text'] except KeyError: error = 'Unable to communicate with pcsd' else: error = 'Unable to sync pcsd certificates' if error: utils.err(error, False)
def disable_cluster_all(): disable_cluster_nodes(utils.getNodesFromCorosyncConf())
def stop_cluster_all(): stop_cluster_nodes(utils.getNodesFromCorosyncConf())
def cluster_cmd(argv): if len(argv) == 0: usage.cluster() exit(1) sub_cmd = argv.pop(0) if (sub_cmd == "help"): usage.cluster(argv) elif (sub_cmd == "setup"): if "--name" in utils.pcs_options: corosync_setup([utils.pcs_options["--name"]] + argv) else: utils.err("A cluster name (--name <name>) is required to setup a cluster") elif (sub_cmd == "sync"): sync_nodes(utils.getNodesFromCorosyncConf(),utils.getCorosyncConf()) elif (sub_cmd == "status"): status.cluster_status(argv) print "" print "PCSD Status:" cluster_gui_status([],True) elif (sub_cmd == "pcsd-status"): cluster_gui_status(argv) elif (sub_cmd == "certkey"): cluster_certkey(argv) elif (sub_cmd == "auth"): cluster_auth(argv) elif (sub_cmd == "token"): cluster_token(argv) elif (sub_cmd == "start"): if "--all" in utils.pcs_options: start_cluster_all() else: start_cluster(argv) elif (sub_cmd == "stop"): if "--all" in utils.pcs_options: stop_cluster_all() else: stop_cluster(argv) elif (sub_cmd == "kill"): kill_cluster(argv) elif (sub_cmd == "standby"): node_standby(argv) elif (sub_cmd == "unstandby"): node_standby(argv, False) elif (sub_cmd == "enable"): if "--all" in utils.pcs_options: enable_cluster_all() else: enable_cluster(argv) elif (sub_cmd == "disable"): if "--all" in utils.pcs_options: disable_cluster_all() else: disable_cluster(argv) elif (sub_cmd == "remote-node"): cluster_remote_node(argv) elif (sub_cmd == "cib"): get_cib(argv) elif (sub_cmd == "cib-push"): cluster_push(argv) elif (sub_cmd == "cib-upgrade"): cluster_upgrade() elif (sub_cmd == "cib-revisions"): cluster_cib_revisions(argv) elif (sub_cmd == "cib-rollback"): cluster_cib_rollback(argv) elif (sub_cmd == "edit"): cluster_edit(argv) elif (sub_cmd == "node"): cluster_node(argv) elif (sub_cmd == "localnode"): cluster_localnode(argv) elif (sub_cmd == "uidgid"): cluster_uidgid(argv) elif (sub_cmd == "corosync"): cluster_get_corosync_conf(argv) elif (sub_cmd == "reload"): cluster_reload(argv) elif (sub_cmd == "destroy"): cluster_destroy(argv) elif (sub_cmd == "verify"): cluster_verify(argv) elif (sub_cmd == "report"): cluster_report(argv) else: usage.cluster() sys.exit(1)
def cluster_auth(argv): if len(argv) == 0: auth_nodes(utils.getNodesFromCorosyncConf()) else: auth_nodes(argv)
def cluster_gui_status(argv): if len(argv) == 0: check_nodes(utils.getNodesFromCorosyncConf()) else: check_nodes(argv)
def nodes_status(argv): if len(argv) == 1 and argv[0] == "pacemaker-id": for node_id, node_name in utils.getPacemakerNodesID().items(): print("{0} {1}".format(node_id, node_name)) return if len(argv) == 1 and argv[0] == "corosync-id": for node_id, node_name in utils.getCorosyncNodesID().items(): print("{0} {1}".format(node_id, node_name)) return if len(argv) == 1 and (argv[0] == "config"): corosync_nodes = utils.getNodesFromCorosyncConf() pacemaker_nodes = utils.getNodesFromPacemaker() print("Corosync Nodes:") print("", end=' ') for node in corosync_nodes: print(node.strip(), end=' ') print("") print("Pacemaker Nodes:") print("", end=' ') for node in pacemaker_nodes: print(node.strip(), end=' ') return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.getNodesFromCorosyncConf() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node in online_nodes: next else: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print("Corosync Nodes:") print(" Online:", end=' ') for node in online_nodes: print(node, end=' ') print("") print(" Offline:", end=' ') for node in offline_nodes: print(node, end=' ') print("") if argv[0] != "both": sys.exit(0) info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] for node in nodes[0].getElementsByTagName("node"): if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": standbynodes.append(node.getAttribute("name")) else: onlinenodes.append(node.getAttribute("name")) else: offlinenodes.append(node.getAttribute("name")) print("Pacemaker Nodes:") print(" Online:", end=' ') for node in onlinenodes: print(node, end=' ') print("") print(" Standby:", end=' ') for node in standbynodes: print(node, end=' ') print("") print(" Offline:", end=' ') for node in offlinenodes: print(node, end=' ') print("")
def stop_cluster_all(): for node in utils.getNodesFromCorosyncConf(): utils.stopCluster(node)
def nodes_status(argv): if len(argv) == 1 and (argv[0] == "config"): corosync_nodes = utils.getNodesFromCorosyncConf() pacemaker_nodes = utils.getNodesFromPacemaker() print "Corosync Nodes:" print "", for node in corosync_nodes: print node.strip(), print "" print "Pacemaker Nodes:" print "", for node in pacemaker_nodes: print node.strip(), return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.getNodesFromCorosyncConf() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node in online_nodes: next else: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print "Corosync Nodes:" print " Online:", for node in online_nodes: print node, print "" print " Offline:", for node in offline_nodes: print node, print "" if argv[0] != "both": sys.exit(0) info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] for node in nodes[0].getElementsByTagName("node"): if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": standbynodes.append(node.getAttribute("name")) else: onlinenodes.append(node.getAttribute("name")) else: offlinenodes.append(node.getAttribute("name")) print "Pacemaker Nodes:" print " Online:", for node in onlinenodes: print node, print "" print " Standby:", for node in standbynodes: print node, print "" print " Offline:", for node in offlinenodes: print node, print ""
def config_restore_remote(infile_name, infile_obj): extracted = { "version.txt": "", "corosync.conf": "", "cluster.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: tar_member_info = next(tarball) if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list = utils.getNodesFromCorosyncConf( extracted["cluster.conf" if utils.is_rhel6() else "corosync.conf"]) if not node_list: utils.err("no nodes found in the tarball") err_msgs = [] for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: err_msgs.append(output) continue status = json.loads(output) if status["corosync"] or status["pacemaker"] or status["cman"]: err_msgs.append( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node) continue except (ValueError, NameError): err_msgs.append("unable to determine status of the node %s" % node) if err_msgs: for msg in err_msgs: utils.err(msg, False) sys.exit(1) # Temporarily disable config files syncing thread in pcsd so it will not # rewrite restored files. 10 minutes should be enough time to restore. # If node returns HTTP 404 it does not support config syncing at all. for node in node_list: retval, output = utils.pauseConfigSyncing(node, 10 * 60) if not (retval == 0 or output.endswith("(HTTP error: 404)")): utils.err(output) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "r") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))
def nodes_status(argv): if len(argv) == 1 and (argv[0] == "config"): corosync_nodes = utils.getNodesFromCorosyncConf() pacemaker_nodes = utils.getNodesFromPacemaker() print "Corosync Nodes:" print "", for node in corosync_nodes: print node.strip(), print "" print "Pacemaker Nodes:" print "", for node in pacemaker_nodes: print node.strip(), return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.getNodesFromCorosyncConf() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node in online_nodes: next else: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print "Corosync Nodes:" print " Online:", for node in online_nodes: print node, print "" print " Offline:", for node in offline_nodes: print node, print "" if argv[0] != "both": sys.exit(0) info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: print "Error: No nodes section found" sys.exit(1) onlinenodes = [] offlinenodes = [] standbynodes = [] for node in nodes[0].getElementsByTagName("node"): if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": standbynodes.append(node.getAttribute("name")) else: onlinenodes.append(node.getAttribute("name")) else: offlinenodes.append(node.getAttribute("name")) print "Pacemaker Nodes:" print " Online:", for node in onlinenodes: print node, print "" print " Standby:", for node in standbynodes: print node, print "" print " Offline:", for node in offlinenodes: print node, print ""
def nodes_status(argv): #print("rrr 02011 nodes_status") with open("/python.out", "a") as myfile: myfile.write("rrr 02011 nodes_status") if len(argv) == 1 and argv[0] == "pacemaker-id": for node_id, node_name in utils.getPacemakerNodesID().items(): print "{0} {1}".format(node_id, node_name) return if len(argv) == 1 and argv[0] == "corosync-id": for node_id, node_name in utils.getCorosyncNodesID().items(): print "{0} {1}".format(node_id, node_name) return if len(argv) == 1 and (argv[0] == "config"): corosync_nodes = utils.getNodesFromCorosyncConf() pacemaker_nodes = utils.getNodesFromPacemaker() print "Corosync Nodes:" print "", for node in corosync_nodes: print node.strip(), print "" print "Pacemaker Nodes:" print "", for node in pacemaker_nodes: print node.strip(), return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.getNodesFromCorosyncConf() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node in online_nodes: next else: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print "Corosync Nodes:" print " Online:", for node in online_nodes: print node, print "" print " Offline:", for node in offline_nodes: print node, print "" if argv[0] != "both": sys.exit(0) info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] for node in nodes[0].getElementsByTagName("node"): if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": standbynodes.append(node.getAttribute("name")) else: onlinenodes.append(node.getAttribute("name")) else: offlinenodes.append(node.getAttribute("name")) print "Pacemaker Nodes:" print " Online:", for node in onlinenodes: print node, print "" print " Standby:", for node in standbynodes: print node, print "" print " Offline:", for node in offlinenodes: print node, print ""
def cluster_node(argv): if len(argv) != 2: usage.cluster(); sys.exit(1) if argv[0] == "add": add_node = True elif argv[0] in ["remove","delete"]: add_node = False else: usage.cluster(); sys.exit(1) node = argv[1] if "," in node: node0 = node.split(",")[0] node1 = node.split(",")[1] else: node0 = node node1 = None status,output = utils.checkAuthorization(node0) if status == 2: utils.err("pcsd is not running on %s" % node0) elif status == 3: utils.err( "%s is not yet authenticated (try pcs cluster auth %s)" % (node0, node0) ) if add_node == True: if node1 is None and utils.need_ring1_address(utils.getCorosyncConf()): utils.err( "cluster is configured for RRP, " "you have to specify ring 1 address for the node" ) elif ( node1 is not None and not utils.need_ring1_address(utils.getCorosyncConf()) ): utils.err( "cluster is not configured for RRP, " "you must not specify ring 1 address for the node" ) corosync_conf = None (canAdd, error) = utils.canAddNodeToCluster(node0) if not canAdd: utils.err("Unable to add '%s' to cluster: %s" % (node0, error)) for my_node in utils.getNodesFromCorosyncConf(): retval, output = utils.addLocalNode(my_node, node0, node1) if retval != 0: print >> sys.stderr, "Error: unable to add %s on %s - %s" % (node0, my_node, output.strip()) else: print "%s: Corosync updated" % my_node corosync_conf = output if corosync_conf != None: utils.setCorosyncConfig(node0, corosync_conf) if "--enable" in utils.pcs_options: utils.enableCluster(node0) if "--start" in utils.pcs_options: utils.startCluster(node0) else: utils.err("Unable to update any nodes") else: nodesRemoved = False c_nodes = utils.getNodesFromCorosyncConf() destroy_cluster([node0]) for my_node in c_nodes: if my_node == node0: continue retval, output = utils.removeLocalNode(my_node, node0) if retval != 0: print >> sys.stderr, "Error: unable to remove %s on %s - %s" % (node0,my_node,output.strip()) else: if output[0] == 0: print "%s: Corosync updated" % my_node nodesRemoved = True else: print >> sys.stderr, "%s: Error executing command occured: %s" % (my_node, "".join(output[1])) if nodesRemoved == False: utils.err("Unable to update any nodes") output, retval = utils.run(["crm_node", "--force", "-R", node0])
def disable_cluster_all(): for node in utils.getNodesFromCorosyncConf(): utils.disableCluster(node)
def nodes_status(argv): if len(argv) == 1 and argv[0] == "pacemaker-id": for node_id, node_name in utils.getPacemakerNodesID().items(): print("{0} {1}".format(node_id, node_name)) return if len(argv) == 1 and argv[0] == "corosync-id": for node_id, node_name in utils.getCorosyncNodesID().items(): print("{0} {1}".format(node_id, node_name)) return if len(argv) == 1 and (argv[0] == "config"): corosync_nodes = utils.getNodesFromCorosyncConf() pacemaker_nodes = utils.getNodesFromPacemaker() print("Corosync Nodes:") if corosync_nodes: print(" " + " ".join(corosync_nodes)) print("Pacemaker Nodes:") if pacemaker_nodes: print(" " + " ".join(pacemaker_nodes)) return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.getNodesFromCorosyncConf() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node in online_nodes: next else: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print("Corosync Nodes:") print(" ".join([" Online:"] + online_nodes)) print(" ".join([" Offline:"] + offline_nodes)) if argv[0] != "both": sys.exit(0) info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] maintenancenodes = [] remote_onlinenodes = [] remote_offlinenodes = [] remote_standbynodes = [] remote_maintenancenodes = [] for node in nodes[0].getElementsByTagName("node"): node_name = node.getAttribute("name") node_remote = node.getAttribute("type") == "remote" if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": if node_remote: remote_standbynodes.append(node_name) else: standbynodes.append(node_name) elif node.getAttribute("maintenance") == "true": if node_remote: remote_maintenancenodes.append(node_name) else: maintenancenodes.append(node_name) else: if node_remote: remote_onlinenodes.append(node_name) else: onlinenodes.append(node_name) else: if node_remote: remote_offlinenodes.append(node_name) else: offlinenodes.append(node_name) print("Pacemaker Nodes:") print(" ".join([" Online:"] + onlinenodes)) print(" ".join([" Standby:"] + standbynodes)) print(" ".join([" Maintenance:"] + maintenancenodes)) print(" ".join([" Offline:"] + offlinenodes)) print("Pacemaker Remote Nodes:") print(" ".join([" Online:"] + remote_onlinenodes)) print(" ".join([" Standby:"] + remote_standbynodes)) print(" ".join([" Maintenance:"] + remote_maintenancenodes)) print(" ".join([" Offline:"] + remote_offlinenodes))
def config_restore_remote(infile_name, infile_obj): extracted = { "version.txt": "", "corosync.conf": "", "cluster.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: # next(tarball) does not work in python2.6 tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list = utils.getNodesFromCorosyncConf( extracted["cluster.conf" if utils.is_rhel6() else "corosync.conf"].decode("utf-8") ) if not node_list: utils.err("no nodes found in the tarball") err_msgs = [] for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: err_msgs.append(output) continue status = json.loads(output) if status["corosync"] or status["pacemaker"] or status["cman"]: err_msgs.append( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node ) continue except (ValueError, NameError): err_msgs.append("unable to determine status of the node %s" % node) if err_msgs: for msg in err_msgs: utils.err(msg, False) sys.exit(1) # Temporarily disable config files syncing thread in pcsd so it will not # rewrite restored files. 10 minutes should be enough time to restore. # If node returns HTTP 404 it does not support config syncing at all. for node in node_list: retval, output = utils.pauseConfigSyncing(node, 10 * 60) if not (retval == 0 or output.endswith("(HTTP error: 404)")): utils.err(output) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "rb") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))
def pcsd_sync_certs(argv, exit_after_error=True): error = False nodes_sync = argv if argv else utils.getNodesFromCorosyncConf() nodes_restart = [] print("Synchronizing pcsd certificates on nodes {0}...".format( ", ".join(nodes_sync) )) pcsd_data = { "nodes": nodes_sync, } output, retval = utils.run_pcsdcli("send_local_certs", pcsd_data) if retval == 0 and output["status"] == "ok" and output["data"]: try: sync_result = output["data"] if sync_result["node_status"]: for node, status in sync_result["node_status"].items(): print("{0}: {1}".format(node, status["text"])) if status["status"] == "ok": nodes_restart.append(node) else: error = True if sync_result["status"] != "ok": error = True utils.err(sync_result["text"], False) if error and not nodes_restart: if exit_after_error: sys.exit(1) else: return print() except (KeyError, AttributeError): utils.err("Unable to communicate with pcsd", exit_after_error) return else: utils.err("Unable to sync pcsd certificates", exit_after_error) return print("Restarting pcsd on the nodes in order to reload the certificates...") pcsd_data = { "nodes": nodes_restart, } output, retval = utils.run_pcsdcli("pcsd_restart_nodes", pcsd_data) if retval == 0 and output["status"] == "ok" and output["data"]: try: restart_result = output["data"] if restart_result["node_status"]: for node, status in restart_result["node_status"].items(): print("{0}: {1}".format(node, status["text"])) if status["status"] != "ok": error = True if restart_result["status"] != "ok": error = True utils.err(restart_result["text"], False) if error: if exit_after_error: sys.exit(1) else: return except (KeyError, AttributeError): utils.err("Unable to communicate with pcsd", exit_after_error) return else: utils.err("Unable to restart pcsd", exit_after_error) return