def _VerifyDisks(cl, uuid, nodes, instances): """Run a per-group "gnt-cluster verify-disks". """ op = opcodes.OpGroupVerifyDisks(group_name=uuid, priority=constants.OP_PRIO_LOW) op.reason = [(constants.OPCODE_REASON_SRC_WATCHER, "Verifying disks of group %s" % uuid, utils.EpochNano())] job_id = cl.SubmitJob([op]) ((_, offline_disk_instances, _), ) = \ cli.PollJob(job_id, cl=cl, feedback_fn=logging.debug) try: cl.ArchiveJob(job_id) except Exception as err: logging.exception("Error while archiving job %d" % job_id) if not offline_disk_instances: # nothing to do logging.debug("Verify-disks reported no offline disks, nothing to do") return logging.debug("Will activate disks for instance(s) %s", utils.CommaJoin(offline_disk_instances)) # We submit only one job, and wait for it. Not optimal, but this puts less # load on the job queue. job = [] for name in offline_disk_instances: try: inst = instances[name] except KeyError: logging.info("Can't find instance '%s', maybe it was ignored", name) continue if inst.status in HELPLESS_STATES or _CheckForOfflineNodes( nodes, inst): logging.info( "Skipping instance '%s' because it is in a helpless state" " or has offline secondaries", name) continue op = opcodes.OpInstanceActivateDisks(instance_name=name) op.reason = [(constants.OPCODE_REASON_SRC_WATCHER, "Activating disks for instance %s" % name, utils.EpochNano())] job.append(op) if job: job_id = cli.SendJob(job, cl=cl) try: cli.PollJob(job_id, cl=cl, feedback_fn=logging.debug) except Exception: # pylint: disable=W0703 logging.exception("Error while activating disks")
def GET(self): """Return a list of all OSes. Can return error 500 in case of a problem. Example: ["debian-etch"] """ cl = self.GetClient() op = opcodes.OpOsDiagnose(output_fields=["name", "variants"], names=[]) cancel_fn = (lambda: _CheckIfConnectionDropped(self._req.request_sock)) job_id = self.SubmitJob([op], cl=cl) # we use custom feedback function, instead of print we log the status result = cli.PollJob(job_id, cl, feedback_fn=baserlib.FeedbackFn, cancel_fn=cancel_fn) diagnose_data = result[0] if not isinstance(diagnose_data, list): raise http.HttpBadGateway(message="Can't get OS list") os_names = [] for (name, variants) in diagnose_data: os_names.extend(cli.CalculateOSNames(name, variants)) return os_names
def _ExecOp(self, *ops): """Execute one or more opcodes and manage the exec buffer. @return: if only opcode has been passed, we return its result; otherwise we return the list of results """ job_id = cli.SendJob(ops, cl=self.cl) results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback) if len(ops) == 1: return results[0] else: return results
def WaitJob(opts, args): """Wait for a job to finish, not producing any output. @param opts: the command line options selected by the user @type args: list @param args: Contains the job ID @rtype: int @return: the desired exit code """ job_id = args[0] retcode = 0 try: cli.PollJob(job_id, feedback_fn=lambda _: None) except errors.GenericError, err: (retcode, job_result) = cli.FormatError(err) ToStderr("Job %s failed: %s", job_id, job_result)
def WatchJob(opts, args): """Follow a job and print its output as it arrives. @param opts: the command line options selected by the user @type args: list @param args: Contains the job ID @rtype: int @return: the desired exit code """ job_id = args[0] msg = ("Output from job %s follows" % job_id) ToStdout(msg) ToStdout("-" * len(msg)) retcode = 0 try: cli.PollJob(job_id) except errors.GenericError, err: (retcode, job_result) = cli.FormatError(err) ToStderr("Job %s failed: %s", job_id, job_result)
def TestJobqueue(opts, _): """Runs a few tests on the job queue. """ _TestJobSubmission(opts) _TestJobDependency(opts) (TM_SUCCESS, TM_MULTISUCCESS, TM_FAIL, TM_PARTFAIL) = range(4) TM_ALL = compat.UniqueFrozenset([ TM_SUCCESS, TM_MULTISUCCESS, TM_FAIL, TM_PARTFAIL, ]) for mode in TM_ALL: test_messages = [ "Testing mode %s" % mode, "Hello World", "A", "", "B", "Foo|bar|baz", utils.TimestampForFilename(), ] fail = mode in (TM_FAIL, TM_PARTFAIL) if mode == TM_PARTFAIL: ToStdout("Testing partial job failure") ops = [ opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=True), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), ] expect_messages = 3 * [test_messages] expect_opstatus = [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_ERROR, constants.OP_STATUS_ERROR, ] expect_resultlen = 2 elif mode == TM_MULTISUCCESS: ToStdout("Testing multiple successful opcodes") ops = [ opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), ] expect_messages = 2 * [test_messages] expect_opstatus = [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, ] expect_resultlen = 2 else: if mode == TM_SUCCESS: ToStdout("Testing job success") expect_opstatus = [constants.OP_STATUS_SUCCESS] elif mode == TM_FAIL: ToStdout("Testing job failure") expect_opstatus = [constants.OP_STATUS_ERROR] else: raise errors.ProgrammerError("Unknown test mode %s" % mode) ops = [ opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=fail), ] expect_messages = [test_messages] expect_resultlen = 1 cl = cli.GetClient() cli.SetGenericOpcodeOpts(ops, opts) # Send job to master daemon job_id = cli.SendJob(ops, cl=cl) reporter = _JobQueueTestReporter() results = None try: results = cli.PollJob(job_id, cl=cl, reporter=reporter) except errors.OpExecError, err: if not fail: raise ToStdout("Ignoring error for 'job fail' test: %s", err) else: if fail: raise errors.OpExecError("Job didn't fail when it should") # Check length of result if fail: if results is not None: raise errors.OpExecError("Received result from failed job") elif len(results) != expect_resultlen: raise errors.OpExecError("Received %s results (%s), expected %s" % (len(results), results, expect_resultlen)) # Check received log messages all_messages = [i for j in expect_messages for i in j] if reporter.GetTestMessages() != all_messages: raise errors.OpExecError("Received test messages don't match input" " (input %r, received %r)" % (all_messages, reporter.GetTestMessages())) # Check final status reported_job_id = reporter.GetJobId() if reported_job_id != job_id: raise errors.OpExecError("Reported job ID %s doesn't match" "submission job ID %s" % (reported_job_id, job_id)) jobdetails = cli.GetClient().QueryJobs([job_id], ["status", "opstatus"])[0] if not jobdetails: raise errors.OpExecError("Can't find job %s" % job_id) if fail: exp_status = constants.JOB_STATUS_ERROR else: exp_status = constants.JOB_STATUS_SUCCESS (final_status, final_opstatus) = jobdetails if final_status != exp_status: raise errors.OpExecError("Final job status is %s, not %s as expected" % (final_status, exp_status)) if len(final_opstatus) != len(ops): raise errors.OpExecError("Did not receive status for all opcodes (got %s," " expected %s)" % (len(final_opstatus), len(ops))) if final_opstatus != expect_opstatus: raise errors.OpExecError("Opcode status is %s, expected %s" % (final_opstatus, expect_opstatus))
def PowerNode(opts, args): """Change/ask power state of a node. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the node to be removed @rtype: int @return: the desired exit code """ command = args.pop(0) if opts.no_headers: headers = None else: headers = {"node": "Node", "status": "Status"} if command not in _LIST_POWER_COMMANDS: ToStderr("power subcommand %s not supported." % command) return constants.EXIT_FAILURE oob_command = "power-%s" % command if oob_command in _OOB_COMMAND_ASK: if not args: ToStderr("Please provide at least one node for this command") return constants.EXIT_FAILURE elif not opts.force and not ConfirmOperation(args, "nodes", "power %s" % command): return constants.EXIT_FAILURE assert len(args) > 0 opcodelist = [] if not opts.ignore_status and oob_command == constants.OOB_POWER_OFF: # TODO: This is a little ugly as we can't catch and revert for node in args: opcodelist.append(opcodes.OpNodeSetParams(node_name=node, offline=True, auto_promote=opts.auto_promote)) opcodelist.append(opcodes.OpOobCommand(node_names=args, command=oob_command, ignore_status=opts.ignore_status, timeout=opts.oob_timeout, power_delay=opts.power_delay)) cli.SetGenericOpcodeOpts(opcodelist, opts) job_id = cli.SendJob(opcodelist) # We just want the OOB Opcode status # If it fails PollJob gives us the error message in it result = cli.PollJob(job_id)[-1] errs = 0 data = [] for node_result in result: (node_tuple, data_tuple) = node_result (_, node_name) = node_tuple (data_status, data_node) = data_tuple if data_status == constants.RS_NORMAL: if oob_command == constants.OOB_POWER_STATUS: if data_node[constants.OOB_POWER_STATUS_POWERED]: text = "powered" else: text = "unpowered" data.append([node_name, text]) else: # We don't expect data here, so we just say, it was successfully invoked data.append([node_name, "invoked"]) else: errs += 1 data.append([node_name, cli.FormatResultError(data_status, True)]) data = GenerateTable(separator=opts.separator, headers=headers, fields=["node", "status"], data=data) for line in data: ToStdout(line) if errs: return constants.EXIT_FAILURE else: return constants.EXIT_SUCCESS