def MigrateNode(opts, args): """Migrate all primary instance on a node. """ cl = GetClient() force = opts.force selected_fields = ["name", "pinst_list"] qcl = GetClient() result = qcl.QueryNodes(names=args, fields=selected_fields, use_locking=False) qcl.Close() ((node, pinst), ) = result if not pinst: ToStdout("No primary instances on node %s, exiting." % node) return 0 pinst = utils.NiceSort(pinst) if not (force or AskUser("Migrate instance(s) %s?" % utils.CommaJoin(utils.NiceSort(pinst)))): return constants.EXIT_CONFIRMATION # this should be removed once --non-live is deprecated if not opts.live and opts.migration_mode is not None: raise errors.OpPrereqError( "Only one of the --non-live and " "--migration-mode options can be passed", errors.ECODE_INVAL) if not opts.live: # --non-live passed mode = constants.HT_MIGRATION_NONLIVE else: mode = opts.migration_mode op = opcodes.OpNodeMigrate(node_name=args[0], mode=mode, iallocator=opts.iallocator, target_node=opts.dst_node, allow_runtime_changes=opts.allow_runtime_chgs, ignore_ipolicy=opts.ignore_ipolicy) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances migrated successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the node migration.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode
def _FormatNodeInfo(node_info): """Format node information for L{cli.PrintGenericInfo()}. """ (name, primary_ip, secondary_ip, pinst, sinst, is_mc, drained, offline, master_capable, vm_capable, powered, ndparams, ndparams_custom) = node_info info = [ ("Node name", name), ("primary ip", primary_ip), ("secondary ip", secondary_ip), ("master candidate", is_mc), ("drained", drained), ("offline", offline), ] if powered is not None: info.append(("powered", powered)) info.extend([ ("master_capable", master_capable), ("vm_capable", vm_capable), ]) if vm_capable: info.extend([ ("primary for instances", [iname for iname in utils.NiceSort(pinst)]), ("secondary for instances", [iname for iname in utils.NiceSort(sinst)]), ]) info.append(("node parameters", FormatParamsDictInfo(ndparams_custom, ndparams))) return info
def CheckPrereq(self): """Check prerequisites. """ assert self.needed_locks[locking.LEVEL_NODEGROUP] assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == frozenset( self.op.node_uuids)) expected_locks = (set([self.group_uuid]) | self.cfg.GetNodeGroupsFromNodes(self.op.node_uuids)) actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP) if actual_locks != expected_locks: raise errors.OpExecError( "Nodes changed groups since locks were acquired," " current groups are '%s', used to be '%s'" % (utils.CommaJoin(expected_locks), utils.CommaJoin(actual_locks))) self.node_data = self.cfg.GetAllNodesInfo() self.group = self.cfg.GetNodeGroup(self.group_uuid) instance_data = self.cfg.GetAllInstancesInfo() if self.group is None: raise errors.OpExecError( "Could not retrieve group '%s' (UUID: %s)" % (self.op.group_name, self.group_uuid)) (new_splits, previous_splits) = \ self.CheckAssignmentForSplitInstances([(uuid, self.group_uuid) for uuid in self.op.node_uuids], self.node_data, instance_data) if new_splits: fmt_new_splits = utils.CommaJoin( utils.NiceSort(self.cfg.GetInstanceNames(new_splits))) if not self.op.force: raise errors.OpExecError( "The following instances get split by this" " change and --force was not given: %s" % fmt_new_splits) else: self.LogWarning( "This operation will split the following instances: %s", fmt_new_splits) if previous_splits: self.LogWarning( "In addition, these already-split instances continue" " to be split across groups: %s", utils.CommaJoin( utils.NiceSort( self.cfg.GetInstanceNames(previous_splits))))
def _GetNames(self, lu, all_names, lock_level): """Helper function to determine names asked for in the query. """ if self.do_locking: names = lu.owned_locks(lock_level) else: names = all_names if self.wanted == locking.ALL_SET: assert not self.names # caller didn't specify names, so ordering is not important return utils.NiceSort(names) # caller specified names and we must keep the same order assert self.names missing = set(self.wanted).difference(names) if missing: raise errors.OpExecError( "Some items were removed before retrieving" " their data: %s" % missing) # Return expanded names return self.wanted
def _VerifyInstanceLvs(self, node_errors, offline_disk_instance_names, missing_disks): node_lv_to_inst = MapInstanceLvsToNodes( self.cfg, [inst for inst in self.instances.values() if inst.disks_active]) if node_lv_to_inst: node_uuids = utils.NiceSort( set(self.owned_locks(locking.LEVEL_NODE)) & set(self.cfg.GetVmCapableNodeList())) node_lvs = self.rpc.call_lv_list(node_uuids, []) for (node_uuid, node_res) in node_lvs.items(): if node_res.offline: continue msg = node_res.fail_msg if msg: logging.warning("Error enumerating LVs on node %s: %s", self.cfg.GetNodeName(node_uuid), msg) node_errors[node_uuid] = msg continue for lv_name, (_, _, lv_online) in node_res.payload.items(): inst = node_lv_to_inst.pop((node_uuid, lv_name), None) if not lv_online and inst is not None: offline_disk_instance_names.add(inst.name) # any leftover items in nv_dict are missing LVs, let's arrange the data # better for key, inst in node_lv_to_inst.iteritems(): missing_disks.setdefault(inst.name, []).append(list(key))
def CheckPrereq(self): """Check prerequisites. This checks that the given group name exists as a node group, that is empty (i.e., contains no nodes), and that is not the last group of the cluster. """ # Verify that the group is empty. group_nodes = [ node.uuid for node in self.cfg.GetAllNodesInfo().values() if node.group == self.group_uuid ] if group_nodes: raise errors.OpPrereqError( "Group '%s' not empty, has the following" " nodes: %s" % (self.op.group_name, utils.CommaJoin(utils.NiceSort(group_nodes))), errors.ECODE_STATE) # Verify the cluster would not be left group-less. if len(self.cfg.GetNodeGroupList()) == 1: raise errors.OpPrereqError( "Group '%s' is the only group, cannot be" " removed" % self.op.group_name, errors.ECODE_STATE)
def _OutputPerNodeOSStatus(msg_map): map_k = utils.NiceSort(msg_map.keys()) for node_name in map_k: ToStdout(" Node: %s, status: %s", node_name, msg_map[node_name]) for msg in nodes_hidden[node_name]: ToStdout(msg)
def ShowExtStorageInfo(opts, args): """List detailed information about ExtStorage providers. @param opts: the command line options selected by the user @type args: list @param args: empty list or list of ExtStorage providers' names @rtype: int @return: the desired exit code """ op = opcodes.OpExtStorageDiagnose(output_fields=["name", "nodegroup_status", "parameters"], names=[]) result = SubmitOpCode(op, opts=opts) if not result: ToStderr("Can't get the ExtStorage providers list") return 1 do_filter = bool(args) for (name, nodegroup_data, parameters) in result: if do_filter: if name not in args: continue else: args.remove(name) nodegroups_valid = [] for nodegroup_name, nodegroup_status in nodegroup_data.items(): if nodegroup_status: nodegroups_valid.append(nodegroup_name) ToStdout("%s:", name) if nodegroups_valid != []: ToStdout(" - Valid for nodegroups:") for ndgrp in utils.NiceSort(nodegroups_valid): ToStdout(" %s", ndgrp) ToStdout(" - Supported parameters:") for pname, pdesc in parameters: ToStdout(" %s: %s", pname, pdesc) else: ToStdout(" - Invalid for all nodegroups") ToStdout("") if args: for name in args: ToStdout("%s: Not Found", name) ToStdout("") return 0
def FailoverNode(opts, args): """Failover all primary instance on a node. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ cl = GetClient() force = opts.force selected_fields = ["name", "pinst_list"] # these fields are static data anyway, so it doesn't matter, but # locking=True should be safer qcl = GetClient() result = qcl.QueryNodes(names=args, fields=selected_fields, use_locking=False) qcl.Close() node, pinst = result[0] if not pinst: ToStderr("No primary instances on node %s, exiting.", node) return 0 pinst = utils.NiceSort(pinst) retcode = 0 if not force and not AskUser("Fail over instance(s) %s?" % (",".join("'%s'" % name for name in pinst))): return 2 jex = JobExecutor(cl=cl, opts=opts) for iname in pinst: op = opcodes.OpInstanceFailover( instance_name=iname, ignore_consistency=opts.ignore_consistency, iallocator=opts.iallocator) jex.QueueJob(iname, op) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All %d instance(s) failed over successfully.", len(results)) else: ToStdout( "There were errors during the failover:\n" "%d error(s) out of %d instance(s).", bad_cnt, len(results)) return retcode
def _ComputeWrongFileStoragePaths(paths, _forbidden=_GetForbiddenFileStoragePaths()): """Cross-checks a list of paths for prefixes considered bad. Some paths, e.g. "/bin", should not be used for file storage. @type paths: list @param paths: List of paths to be checked @rtype: list @return: Sorted list of paths for which the user should be warned """ def _Check(path): return (not os.path.isabs(path) or path in _forbidden or filter(lambda p: utils.IsBelowDir(p, path), _forbidden)) return utils.NiceSort(filter(_Check, map(os.path.normpath, paths)))
def GenericQueryFieldsTest(cmd, fields): master = qa_config.GetMasterNode() # Listing fields AssertRedirectedCommand([cmd, "list-fields"]) AssertRedirectedCommand([cmd, "list-fields"] + fields) # Check listed fields (all, must be sorted) realcmd = [cmd, "list-fields", "--separator=|", "--no-headers"] output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(realcmd)).splitlines() AssertEqual([line.split("|", 1)[0] for line in output], utils.NiceSort(fields)) # Check exit code for listing unknown field rcode, _, _ = AssertCommand([cmd, "list-fields", "field/does/not/exist"], fail=True) AssertEqual(rcode, constants.EXIT_UNKNOWN_FIELD)
def CheckPrereq(self): """Check prerequisites. This checks that the given network name exists as a network, that is empty (i.e., contains no nodes), and that is not the last group of the cluster. """ # Verify that the network is not conncted. node_groups = [group.name for group in self.cfg.GetAllNodeGroupsInfo().values() if self.network_uuid in group.networks] if node_groups: self.LogWarning("Network '%s' is connected to the following" " node groups: %s" % (self.op.network_name, utils.CommaJoin(utils.NiceSort(node_groups)))) raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
def GenericQueryTest(cmd, fields, namefield="name", test_unknown=True): """Runs a number of tests on query commands. @param cmd: Command name @param fields: List of field names """ rnd = random.Random(hash(cmd)) fields = list(fields) rnd.shuffle(fields) # Test a number of field combinations for testfields in _SelectQueryFields(rnd, fields): AssertRedirectedCommand( [cmd, "list", "--output", ",".join(testfields)]) if namefield is not None: namelist_fn = compat.partial(_List, cmd, [namefield]) # When no names were requested, the list must be sorted names = namelist_fn(None) AssertEqual(names, utils.NiceSort(names)) # When requesting specific names, the order must be kept revnames = list(reversed(names)) AssertEqual(namelist_fn(revnames), revnames) randnames = list(names) rnd.shuffle(randnames) AssertEqual(namelist_fn(randnames), randnames) if test_unknown: # Listing unknown items must fail AssertCommand([cmd, "list", "this.name.certainly.does.not.exist"], fail=True) # Check exit code for listing unknown field rcode, _, _ = AssertRedirectedCommand( [cmd, "list", "--output=field/does/not/exist"], fail=True) AssertEqual(rcode, constants.EXIT_UNKNOWN_FIELD)
def _OutputPerNodeStatus(msg_map): map_k = utils.NiceSort(msg_map) for node_name in map_k: ToStdout(" Node: %s, status: %s", node_name, msg_map[node_name])
def _OutputPerNodegroupStatus(msg_map): map_k = utils.NiceSort(msg_map.keys()) for nodegroup in map_k: ToStdout(" For nodegroup: %s --> %s", nodegroup, msg_map[nodegroup])
def EvacuateNode(opts, args): """Relocate all secondary instance from a node. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ if opts.dst_node is not None: ToStderr("New secondary node given (disabling iallocator), hence evacuating" " secondary instances only.") opts.secondary_only = True opts.primary_only = False if opts.secondary_only and opts.primary_only: raise errors.OpPrereqError("Only one of the --primary-only and" " --secondary-only options can be passed", errors.ECODE_INVAL) elif opts.primary_only: mode = constants.NODE_EVAC_PRI elif opts.secondary_only: mode = constants.NODE_EVAC_SEC else: mode = constants.NODE_EVAC_ALL # Determine affected instances fields = [] if not opts.secondary_only: fields.append("pinst_list") if not opts.primary_only: fields.append("sinst_list") cl = GetClient() qcl = GetClient() result = qcl.QueryNodes(names=args, fields=fields, use_locking=False) qcl.Close() instances = set(itertools.chain(*itertools.chain(*itertools.chain(result)))) if not instances: # No instances to evacuate ToStderr("No instances to evacuate on node(s) %s, exiting.", utils.CommaJoin(args)) return constants.EXIT_SUCCESS if not (opts.force or AskUser("Relocate instance(s) %s from node(s) %s?" % (utils.CommaJoin(utils.NiceSort(instances)), utils.CommaJoin(args)))): return constants.EXIT_CONFIRMATION # Evacuate node op = opcodes.OpNodeEvacuate(node_name=args[0], mode=mode, remote_node=opts.dst_node, iallocator=opts.iallocator, early_release=opts.early_release, ignore_soft_errors=opts.ignore_soft_errors) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances evacuated successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the evacuation.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode
def ShowJobs(opts, args): """Show detailed information about jobs. @param opts: the command line options selected by the user @type args: list @param args: should contain the job IDs to be queried @rtype: int @return: the desired exit code """ def format_msg(level, text): """Display the text indented.""" ToStdout("%s%s", " " * level, text) def result_helper(value): """Format a result field in a nice way.""" if isinstance(value, (tuple, list)): return "[%s]" % utils.CommaJoin(value) else: return str(value) selected_fields = [ "id", "status", "ops", "opresult", "opstatus", "oplog", "opstart", "opexec", "opend", "received_ts", "start_ts", "end_ts", ] qfilter = qlang.MakeSimpleFilter("id", _ParseJobIds(args)) cl = GetClient(query=True) result = cl.Query(constants.QR_JOB, selected_fields, qfilter).data first = True for entry in result: if not first: format_msg(0, "") else: first = False ((_, job_id), (rs_status, status), (_, ops), (_, opresult), (_, opstatus), (_, oplog), (_, opstart), (_, opexec), (_, opend), (_, recv_ts), (_, start_ts), (_, end_ts)) = entry # Detect non-normal results if rs_status != constants.RS_NORMAL: format_msg(0, "Job ID %s not found" % job_id) continue format_msg(0, "Job ID: %s" % job_id) if status in _USER_JOB_STATUS: status = _USER_JOB_STATUS[status] else: raise errors.ProgrammerError("Unknown job status code '%s'" % status) format_msg(1, "Status: %s" % status) if recv_ts is not None: format_msg(1, "Received: %s" % FormatTimestamp(recv_ts)) else: format_msg(1, "Missing received timestamp (%s)" % str(recv_ts)) if start_ts is not None: if recv_ts is not None: d1 = start_ts[0] - recv_ts[0] + (start_ts[1] - recv_ts[1]) / 1000000.0 delta = " (delta %.6fs)" % d1 else: delta = "" format_msg( 1, "Processing start: %s%s" % (FormatTimestamp(start_ts), delta)) else: format_msg(1, "Processing start: unknown (%s)" % str(start_ts)) if end_ts is not None: if start_ts is not None: d2 = end_ts[0] - start_ts[0] + (end_ts[1] - start_ts[1]) / 1000000.0 delta = " (delta %.6fs)" % d2 else: delta = "" format_msg( 1, "Processing end: %s%s" % (FormatTimestamp(end_ts), delta)) else: format_msg(1, "Processing end: unknown (%s)" % str(end_ts)) if end_ts is not None and recv_ts is not None: d3 = end_ts[0] - recv_ts[0] + (end_ts[1] - recv_ts[1]) / 1000000.0 format_msg(1, "Total processing time: %.6f seconds" % d3) else: format_msg(1, "Total processing time: N/A") format_msg(1, "Opcodes:") for (opcode, result, status, log, s_ts, x_ts, e_ts) in \ zip(ops, opresult, opstatus, oplog, opstart, opexec, opend): format_msg(2, "%s" % opcode["OP_ID"]) format_msg(3, "Status: %s" % status) if isinstance(s_ts, (tuple, list)): format_msg(3, "Processing start: %s" % FormatTimestamp(s_ts)) else: format_msg(3, "No processing start time") if isinstance(x_ts, (tuple, list)): format_msg(3, "Execution start: %s" % FormatTimestamp(x_ts)) else: format_msg(3, "No execution start time") if isinstance(e_ts, (tuple, list)): format_msg(3, "Processing end: %s" % FormatTimestamp(e_ts)) else: format_msg(3, "No processing end time") format_msg(3, "Input fields:") for key in utils.NiceSort(opcode.keys()): if key == "OP_ID": continue val = opcode[key] if isinstance(val, (tuple, list)): val = ",".join([str(item) for item in val]) format_msg(4, "%s: %s" % (key, val)) if result is None: format_msg(3, "No output data") elif isinstance(result, (tuple, list)): if not result: format_msg(3, "Result: empty sequence") else: format_msg(3, "Result:") for elem in result: format_msg(4, result_helper(elem)) elif isinstance(result, dict): if not result: format_msg(3, "Result: empty dictionary") else: format_msg(3, "Result:") for key, val in result.iteritems(): format_msg(4, "%s: %s" % (key, result_helper(val))) else: format_msg(3, "Result: %s" % result) format_msg(3, "Execution log:") for serial, log_ts, log_type, log_msg in log: time_txt = FormatTimestamp(log_ts) encoded = FormatLogMessage(log_type, log_msg) format_msg( 4, "%s:%s:%s %s" % (serial, time_txt, log_type, encoded)) return 0
def Exec(self, feedback_fn): """Execute OOB and return result if we expect any. """ ret = [] for idx, node in enumerate( utils.NiceSort(self.nodes, key=lambda node: node.name)): node_entry = [(constants.RS_NORMAL, node.name)] ret.append(node_entry) oob_program = SupportsOob(self.cfg, node) if not oob_program: node_entry.append((constants.RS_UNAVAIL, None)) continue logging.info("Executing out-of-band command '%s' using '%s' on %s", self.op.command, oob_program, node.name) result = self.rpc.call_run_oob(self.master_node_uuid, oob_program, self.op.command, node.name, self.op.timeout) if result.fail_msg: self.LogWarning("Out-of-band RPC failed on node '%s': %s", node.name, result.fail_msg) node_entry.append((constants.RS_NODATA, None)) continue try: self._CheckPayload(result) except errors.OpExecError, err: self.LogWarning( "Payload returned by node '%s' is not valid: %s", node.name, err) node_entry.append((constants.RS_NODATA, None)) else: if self.op.command == constants.OOB_HEALTH: # For health we should log important events for item, status in result.payload: if status in [ constants.OOB_STATUS_WARNING, constants.OOB_STATUS_CRITICAL ]: self.LogWarning( "Item '%s' on node '%s' has status '%s'", item, node.name, status) if self.op.command == constants.OOB_POWER_ON: node.powered = True elif self.op.command == constants.OOB_POWER_OFF: node.powered = False elif self.op.command == constants.OOB_POWER_STATUS: powered = result.payload[ constants.OOB_POWER_STATUS_POWERED] if powered != node.powered: logging.warning( ("Recorded power state (%s) of node '%s' does not" " match actual power state (%s)"), node.powered, node.name, powered) # For configuration changing commands we should update the node if self.op.command in (constants.OOB_POWER_ON, constants.OOB_POWER_OFF): self.cfg.Update(node, feedback_fn) node_entry.append((constants.RS_NORMAL, result.payload)) if (self.op.command == constants.OOB_POWER_ON and idx < len(self.nodes) - 1): time.sleep(self.op.power_delay)
def DiagnoseOS(opts, args): """Analyse all OSes on this cluster. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ op = opcodes.OpOsDiagnose(output_fields=[ "name", "valid", "variants", "node_status", "hidden", "blacklisted" ], names=[]) result = SubmitOpCode(op, opts=opts) if result is None: ToStderr("Can't get the OS list") return 1 has_bad = False for os_name, _, os_variants, node_data, hid, blk in result: nodes_valid = {} nodes_bad = {} nodes_hidden = {} for node_name, node_info in node_data.iteritems(): nodes_hidden[node_name] = [] if node_info: # at least one entry in the per-node list (fo_path, fo_status, fo_msg, fo_variants, fo_params, fo_api, fo_trusted) = node_info.pop(0) fo_msg = "%s (path: %s)" % (_OsStatus(fo_status, fo_msg), fo_path) if fo_api: max_os_api = max(fo_api) fo_msg += " [API versions: %s]" % utils.CommaJoin(fo_api) else: max_os_api = 0 fo_msg += " [no API versions declared]" if max_os_api >= constants.OS_API_V15: if fo_variants: fo_msg += " [variants: %s]" % utils.CommaJoin( fo_variants) else: fo_msg += " [no variants]" if max_os_api >= constants.OS_API_V20: if fo_params: fo_msg += (" [parameters: %s]" % utils.CommaJoin([v[0] for v in fo_params])) else: fo_msg += " [no parameters]" if fo_trusted: fo_msg += " [trusted]" else: fo_msg += " [untrusted]" if fo_status: nodes_valid[node_name] = fo_msg else: nodes_bad[node_name] = fo_msg for hpath, hstatus, hmsg, _, _, _ in node_info: nodes_hidden[node_name].append( " [hidden] path: %s, status: %s" % (hpath, _OsStatus(hstatus, hmsg))) else: nodes_bad[node_name] = "OS not found" # TODO: Shouldn't the global status be calculated by the LU? if nodes_valid and not nodes_bad: status = "valid" elif not nodes_valid and nodes_bad: status = "invalid" has_bad = True else: status = "partial valid" has_bad = True st_msg = "OS: %s [global status: %s]" % (os_name, status) if hid: st_msg += " [hidden]" if blk: st_msg += " [blacklisted]" ToStdout(st_msg) if os_variants: ToStdout(" Variants: [%s]" % utils.CommaJoin(os_variants)) for msg_map in (nodes_valid, nodes_bad): map_k = utils.NiceSort(msg_map.keys()) for node_name in map_k: ToStdout(" Node: %s, status: %s", node_name, msg_map[node_name]) for msg in nodes_hidden[node_name]: ToStdout(msg) ToStdout("") return int(has_bad)