def TestLiveRepair(): """Test node evacuate failover upon diagnosis. """ _SetUp('live-repair') n = random.randint(10000, 99999) node = qa_config.AcquireNode(exclude=qa_config.GetMasterNode()) UploadData( node.primary, 'echo \'' + serializer.DumpJson({ "status": "live-repair", "command": "repair", "details": str(n) }).strip() + '\'', 0755, '/etc/ganeti/node-diagnose-commands/live-repair') UploadData( node.primary, """#!/usr/bin/python import sys import json n = json.loads(sys.stdin.read())['details'] with open('/tmp/' + n, 'w') as f: f.write(n) print 'file written' """, 0755, '/etc/ganeti/node-repair-commands/repair') _AssertRepairCommand() tag = _AssertRepairTagAddition(node) if str(n) != AssertCommand(["cat", "/tmp/" + str(n)], node=node)[1]: raise qa_error.Error('Repair command was unsuccessful') node.Release() _TearDown(node, tag, [ '/etc/ganeti/node-diagnose-commands/live-repair', '/etc/ganeti/node-repair-commands/repair' ], False)
def main(): """Main program. """ colors.check_for_colors() parser = optparse.OptionParser(usage="%prog [options] <config-file>") parser.add_option("--yes-do-it", dest="yes_do_it", action="store_true", help="Really execute the tests") (opts, args) = parser.parse_args() if len(args) == 1: (config_file, ) = args else: parser.error("Wrong number of arguments.") if not opts.yes_do_it: print ("Executing this script irreversibly destroys any Ganeti\n" "configuration on all nodes involved. If you really want\n" "to start testing, supply the --yes-do-it option.") sys.exit(1) qa_config.Load(config_file) primary = qa_config.GetMasterNode().primary qa_utils.StartMultiplexer(primary) print ("SSH command for primary node: %s" % utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, ""))) print ("SSH command for other nodes: %s" % utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", ""))) try: RunQa() finally: qa_utils.CloseMultiplexers()
def TestClusterMasterFailoverWithDrainedQueue(): """gnt-cluster master-failover with drained queue""" master = qa_config.GetMasterNode() failovermaster = qa_config.AcquireNode(exclude=master) # Ensure queue is not drained for node in [master, failovermaster]: _AssertDrainFile(node, fail=True) # Drain queue on failover master AssertCommand(["touch", _NodeQueueDrainFile(failovermaster)], node=failovermaster) cmd = ["gnt-cluster", "master-failover"] try: _AssertDrainFile(failovermaster) AssertCommand(cmd, node=failovermaster) _AssertDrainFile(master, fail=True) _AssertDrainFile(failovermaster, fail=True) # Back to original master node AssertCommand(cmd, node=master) finally: failovermaster.Release() # Ensure queue is not drained for node in [master, failovermaster]: _AssertDrainFile(node, fail=True)
def AssertClusterVerify(fail=False, errors=None, warnings=None, no_warnings=None): """Run cluster-verify and check the result, ignoring warnings by default. @type fail: bool @param fail: if cluster-verify is expected to fail instead of succeeding. @type errors: list of tuples @param errors: List of CV_XXX errors that are expected; if specified, all the errors listed must appear in cluster-verify output. A non-empty value implies C{fail=True}. @type warnings: list of tuples @param warnings: List of CV_XXX warnings that are expected to be raised; if specified, all the errors listed must appear in cluster-verify output. @type no_warnings: list of tuples @param no_warnings: List of CV_XXX warnings that we expect NOT to be raised. """ cvcmd = "gnt-cluster verify" mnode = qa_config.GetMasterNode() if errors or warnings or no_warnings: cvout = GetCommandOutput(mnode.primary, cvcmd + " --error-codes", fail=(fail or errors)) print cvout (act_errs, act_warns) = _GetCVErrorCodes(cvout) if errors: _CheckVerifyErrors(act_errs, errors, "error") if warnings: _CheckVerifyErrors(act_warns, warnings, "warning") if no_warnings: _CheckVerifyNoWarnings(act_warns, no_warnings) else: AssertCommand(cvcmd, fail=fail, node=mnode)
def _CreateRapiUser(rapi_user): """RAPI credentials creation, with the secret auto-generated. """ rapi_secret = utils.GenerateSecret() master = qa_config.GetMasterNode() rapi_users_path = qa_utils.MakeNodePath(master, pathutils.RAPI_USERS_FILE) rapi_dir = os.path.dirname(rapi_users_path) fh = tempfile.NamedTemporaryFile() try: fh.write("%s %s write\n" % (rapi_user, rapi_secret)) fh.flush() tmpru = qa_utils.UploadFile(master.primary, fh.name) try: AssertCommand(["mkdir", "-p", rapi_dir]) AssertCommand(["mv", tmpru, rapi_users_path]) finally: AssertCommand(["rm", "-f", tmpru]) finally: fh.close() # The certificates have to be reloaded now AssertCommand(["service", "ganeti", "restart"]) return rapi_secret
def TestNodeAddAll(): """Adding all nodes to cluster.""" master = qa_config.GetMasterNode() for node in qa_config.get("nodes"): if node != master: NodeAdd(node, readd=False) for node in qa_config.get("nodes"): def GetNonStartDaemons(): cmd = utils.ShellQuoteArgs(["ps", "-Ao", "comm"]) prcs = AssertCommand(cmd, node=node)[1] non_start_daemons = [] def AddIfNotStarted(daemon): if daemon not in prcs: non_start_daemons.append(daemon) AddIfNotStarted('ganeti-noded') if constants.ENABLE_MOND: AddIfNotStarted('ganeti-mond') if node == master: AddIfNotStarted('ganeti-wconfd') AddIfNotStarted('ganeti-rapi') AddIfNotStarted('ganeti-luxid') AddIfNotStarted('ganeti-maintd') return non_start_daemons nsd = GetNonStartDaemons() for daemon in nsd: raise qa_error.Error(daemon + ' is not running at %s' % node.primary)
def TestInterClusterInstanceMove(src_instance, dest_instance, inodes, tnode, perform_checks=True): """Test tools/move-instance""" master = qa_config.GetMasterNode() rapi_pw_file = tempfile.NamedTemporaryFile() rapi_pw_file.write(_rapi_password) rapi_pw_file.flush() # Needed only if checks are to be performed if perform_checks: dest_instance.SetDiskTemplate(src_instance.disk_template) # TODO: Run some instance tests before moving back if len(inodes) > 1: # No disk template currently requires more than 1 secondary node. If this # changes, either this test must be skipped or the script must be updated. assert len(inodes) == 2 snode = inodes[1] else: # instance is not redundant, but we still need to pass a node # (which will be ignored) snode = tnode pnode = inodes[0] # note: pnode:snode are the *current* nodes, so we move it first to # tnode:pnode, then back to pnode:snode for current_src_inst, current_dest_inst, target_pnode, target_snode in \ [(src_instance.name, dest_instance.name, tnode.primary, pnode.primary), (dest_instance.name, src_instance.name, pnode.primary, snode.primary)]: cmd = [ "../tools/move-instance", "--verbose", "--src-ca-file=%s" % _rapi_ca.name, "--src-username=%s" % _rapi_username, "--src-password-file=%s" % rapi_pw_file.name, "--dest-instance-name=%s" % current_dest_inst, "--dest-primary-node=%s" % target_pnode, "--dest-secondary-node=%s" % target_snode, "--net=0:mac=%s" % constants.VALUE_GENERATE, master.primary, master.primary, current_src_inst, ] # Some uses of this test might require that RAPI-only commands are used, # and the checks are command-line based. if perform_checks: qa_utils.RunInstanceCheck(current_dest_inst, False) AssertEqual(StartLocalCommand(cmd).wait(), 0) if perform_checks: qa_utils.RunInstanceCheck(current_src_inst, False) qa_utils.RunInstanceCheck(current_dest_inst, True)
def TestClusterBurnin(): """Burnin""" master = qa_config.GetMasterNode() options = qa_config.get("options", {}) disk_template = options.get("burnin-disk-template", constants.DT_DRBD8) parallel = options.get("burnin-in-parallel", False) check_inst = options.get("burnin-check-instances", False) do_rename = options.get("burnin-rename", "") do_reboot = options.get("burnin-reboot", True) reboot_types = options.get("reboot-types", constants.REBOOT_TYPES) # Get as many instances as we need instances = [] try: try: num = qa_config.get("options", {}).get("burnin-instances", 1) for _ in range(0, num): instances.append(qa_config.AcquireInstance()) except qa_error.OutOfInstancesError: print "Not enough instances, continuing anyway." if len(instances) < 1: raise qa_error.Error("Burnin needs at least one instance") script = qa_utils.UploadFile(master.primary, "../tools/burnin") try: disks = qa_config.GetDiskOptions() # Run burnin cmd = [ "env", "PYTHONPATH=%s" % _constants.VERSIONEDSHAREDIR, script, "--os=%s" % qa_config.get("os"), "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM), "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM), "--disk-size=%s" % ",".join([d.get("size") for d in disks]), "--disk-growth=%s" % ",".join([d.get("growth") for d in disks]), "--disk-template=%s" % disk_template ] if parallel: cmd.append("--parallel") cmd.append("--early-release") if check_inst: cmd.append("--http-check") if do_rename: cmd.append("--rename=%s" % do_rename) if not do_reboot: cmd.append("--no-reboot") else: cmd.append("--reboot-types=%s" % ",".join(reboot_types)) cmd += [inst.name for inst in instances] AssertCommand(cmd) finally: AssertCommand(["rm", "-f", script]) finally: for inst in instances: inst.Release()
def TestEmptyCluster(): """Testing remote API on an empty cluster. """ master = qa_config.GetMasterNode() master_full = qa_utils.ResolveNodeName(master) def _VerifyInfo(data): AssertIn("name", data) AssertIn("master", data) AssertEqual(data["master"], master_full) def _VerifyNodes(data): master_entry = { "id": master_full, "uri": "/2/nodes/%s" % master_full, } AssertIn(master_entry, data) def _VerifyNodesBulk(data): for node in data: for entry in NODE_FIELDS: AssertIn(entry, node) def _VerifyGroups(data): default_group = { "name": constants.INITIAL_NODE_GROUP_NAME, "uri": "/2/groups/" + constants.INITIAL_NODE_GROUP_NAME, } AssertIn(default_group, data) def _VerifyGroupsBulk(data): for group in data: for field in GROUP_FIELDS: AssertIn(field, group) _DoTests([ ("/", None, "GET", None), ("/2/info", _VerifyInfo, "GET", None), ("/2/tags", None, "GET", None), ("/2/nodes", _VerifyNodes, "GET", None), ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None), ("/2/groups", _VerifyGroups, "GET", None), ("/2/groups?bulk=1", _VerifyGroupsBulk, "GET", None), ("/2/instances", [], "GET", None), ("/2/instances?bulk=1", [], "GET", None), ("/2/os", None, "GET", None), ]) # Test HTTP Not Found for method in ["GET", "PUT", "POST", "DELETE"]: try: _DoTests([("/99/resource/not/here/99", None, method, None)]) except rapi.client.GanetiApiError, err: AssertEqual(err.code, 404) else: raise qa_error.Error( "Non-existent resource didn't return HTTP 404")
def RunCustomSshPortTests(): """Test accessing nodes with custom SSH ports. This requires removing nodes, adding them to a new group, and then undoing the change. """ if not qa_config.TestEnabled("group-custom-ssh-port"): return std_port = netutils.GetDaemonPort(constants.SSH) port = 211 master = qa_config.GetMasterNode() with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes: # Checks if the node(s) could be contacted through IPv6. # If yes, better skip the whole test. for node in nodes: if qa_utils.UsesIPv6Connection(node.primary, std_port): print( "Node %s is likely to be reached using IPv6," "skipping the test" % (node.primary, )) return for node in nodes: qa_node.NodeRemove(node) with qa_iptables.RulesContext() as r: with qa_group.NewGroupCtx() as group: qa_group.ModifyGroupSshPort(r, group, nodes, port) for node in nodes: qa_node.NodeAdd(node, group=group) # Make sure that the cluster doesn't have any pre-existing problem qa_cluster.AssertClusterVerify() # Create and allocate instances instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes) try: instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes) try: # cluster-verify checks that disks are allocated correctly qa_cluster.AssertClusterVerify() # Remove instances qa_instance.TestInstanceRemove(instance2) qa_instance.TestInstanceRemove(instance1) finally: instance2.Release() finally: instance1.Release() for node in nodes: qa_node.NodeRemove(node) for node in nodes: qa_node.NodeAdd(node) qa_cluster.AssertClusterVerify()
def AssertCommand(cmd, fail=False, node=None, log_cmd=True, forward_agent=True, max_seconds=None): """Checks that a remote command succeeds. @param cmd: either a string (the command to execute) or a list (to be converted using L{utils.ShellQuoteArgs} into a string) @type fail: boolean or None @param fail: if the command is expected to fail instead of succeeding, or None if we don't care @param node: if passed, it should be the node on which the command should be executed, instead of the master node (can be either a dict or a string) @param log_cmd: if False, the command won't be logged (simply passed to StartSSH) @type forward_agent: boolean @param forward_agent: whether to forward the agent when starting the SSH session or not, sometimes useful for crypto-related operations which can use a key they should not @type max_seconds: double @param max_seconds: fail if the command takes more than C{max_seconds} seconds @return: the return code, stdout and stderr of the command @raise qa_error.Error: if the command fails when it shouldn't or vice versa """ if node is None: node = qa_config.GetMasterNode() nodename = _GetName(node, operator.attrgetter("primary")) if isinstance(cmd, basestring): cmdstr = cmd else: cmdstr = utils.ShellQuoteArgs(cmd) start = datetime.datetime.now() popen = StartSSH(nodename, cmdstr, log_cmd=log_cmd, forward_agent=forward_agent) # Run the command stdout, stderr = popen.communicate() rcode = popen.returncode duration_seconds = TimedeltaToTotalSeconds(datetime.datetime.now() - start) try: if fail is not None: _AssertRetCode(rcode, fail, cmdstr, nodename) finally: if log_cmd: _PrintCommandOutput(stdout, stderr) if max_seconds is not None: if duration_seconds > max_seconds: raise qa_error.Error( "Cmd '%s' took %f seconds, maximum of %f was exceeded" % (cmdstr, duration_seconds, max_seconds)) return rcode, stdout, stderr
def _ResetWatcherDaemon(): """Removes the watcher daemon's state file. """ path = \ qa_utils.MakeNodePath(qa_config.GetMasterNode(), pathutils.WATCHER_GROUP_STATE_FILE % "*-*-*-*") AssertCommand(["bash", "-c", "rm -vf %s" % path])
def _AssertOobCall(verify_path, expected_args): """Assert the OOB call was performed with expetected args.""" master = qa_config.GetMasterNode() verify_output_cmd = utils.ShellQuoteArgs(["cat", verify_path]) output = qa_utils.GetCommandOutput(master.primary, verify_output_cmd, tty=False) AssertEqual(expected_args, output.strip())
def TestNodeModify(node): """gnt-node modify""" default_pool_size = 10 nodes = qa_config.GetAllNodes() test_pool_size = len(nodes) - 1 # Reduce the number of master candidates, because otherwise all # subsequent 'gnt-cluster verify' commands fail due to not enough # master candidates. AssertCommand( ["gnt-cluster", "modify", "--candidate-pool-size=%s" % test_pool_size]) # make sure enough master candidates will be available by disabling the # master candidate role first with --auto-promote AssertCommand([ "gnt-node", "modify", "--master-candidate=no", "--auto-promote", node.primary ]) # now it's save to force-remove the master candidate role for flag in ["master-candidate", "drained", "offline"]: for value in ["yes", "no"]: AssertCommand([ "gnt-node", "modify", "--force", "--%s=%s" % (flag, value), node.primary ]) AssertCommand(["gnt-cluster", "verify"]) AssertCommand( ["gnt-node", "modify", "--master-candidate=yes", node.primary]) # Test setting secondary IP address AssertCommand([ "gnt-node", "modify", "--secondary-ip=%s" % node.secondary, node.primary ]) AssertRedirectedCommand(["gnt-cluster", "verify"]) AssertCommand([ "gnt-cluster", "modify", "--candidate-pool-size=%s" % default_pool_size ]) # For test clusters with more nodes than the default pool size, # we now have too many master candidates. To readjust to the original # size, manually demote all nodes and rely on auto-promotion to adjust. if len(nodes) > default_pool_size: master = qa_config.GetMasterNode() for n in nodes: if n.primary != master.primary: AssertCommand([ "gnt-node", "modify", "--master-candidate=no", "--auto-promote", n.primary ])
def MarkNodeAddedAll(): """Mark all nodes as added. This is useful if we don't create the cluster ourselves (in qa). """ master = qa_config.GetMasterNode() for node in qa_config.get("nodes"): if node != master: node.MarkAdded()
def _TestGroupModifyISpecs(groupname): # This test is built on the assumption that the default ipolicy holds for # the node group under test old_values = _GetGroupIPolicy(groupname) samevals = dict((p, 4) for p in constants.ISPECS_PARAMETERS) base_specs = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: samevals, constants.ISPECS_MAX: samevals, }], } mod_values = _TestGroupSetISpecs(groupname, new_specs=base_specs, old_values=old_values) for par in constants.ISPECS_PARAMETERS: # First make sure that the test works with good values good_specs = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: {par: 8}, constants.ISPECS_MAX: {par: 8}, }], } mod_values = _TestGroupSetISpecs(groupname, diff_specs=good_specs, old_values=mod_values) bad_specs = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: {par: 8}, constants.ISPECS_MAX: {par: 4}, }], } _TestGroupSetISpecs(groupname, diff_specs=bad_specs, fail=True, old_values=mod_values) AssertCommand(["gnt-group", "modify", "--ipolicy-bounds-specs", "default", groupname]) AssertEqual(_GetGroupIPolicy(groupname), old_values) # Get the ipolicy command (from the cluster config) mnode = qa_config.GetMasterNode() addcmd = GetCommandOutput(mnode.primary, utils.ShellQuoteArgs([ "gnt-group", "show-ispecs-cmd", "--include-defaults", groupname, ])) modcmd = ["gnt-group", "modify"] opts = addcmd.split() assert opts[0:2] == ["gnt-group", "add"] for k in range(2, len(opts) - 1): if opts[k].startswith("--ipolicy-"): assert k + 2 <= len(opts) modcmd.extend(opts[k:k + 2]) modcmd.append(groupname) # Apply the ipolicy to the group and verify the result AssertCommand(modcmd) new_addcmd = GetCommandOutput(mnode.primary, utils.ShellQuoteArgs([ "gnt-group", "show-ispecs-cmd", groupname, ])) AssertEqual(addcmd, new_addcmd)
def _GetJobStatuses(): """ Invokes gnt-job list and extracts an id to status dictionary. @rtype: dict of string to string @return: A dictionary mapping job ids to matching statuses """ master = qa_config.GetMasterNode() list_output = GetCommandOutput( master.primary, "gnt-job list --no-headers --output=id,status") return dict(map(lambda s: s.split(), list_output.splitlines()))
def TestPauseWatcher(): """Tests and pauses the watcher. """ master = qa_config.GetMasterNode() AssertCommand(["gnt-cluster", "watcher", "pause", "4h"]) cmd = ["gnt-cluster", "watcher", "info"] output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) AssertMatch(output, r"^.*\bis paused\b.*")
def TestJobCancellation(): """gnt-job cancel""" # The delay used for the first command should be large enough for the next # command and the cancellation command to complete before the first job is # done. The second delay should be small enough that not too much time is # spend waiting in the case of a failed cancel and a running command. FIRST_COMMAND_DELAY = 10.0 AssertCommand(["gnt-debug", "delay", "--submit", str(FIRST_COMMAND_DELAY)]) SECOND_COMMAND_DELAY = 3.0 master = qa_config.GetMasterNode() # Forcing tty usage does not work on buildbot, so force all output of this # command to be redirected to stdout job_id_output = GetCommandOutput( master.primary, "gnt-debug delay --submit %s 2>&1" % SECOND_COMMAND_DELAY) possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output) if len(possible_job_ids) != 1: raise qa_error.Error( "Cannot parse gnt-debug delay output to find job id") job_id = possible_job_ids[0] AssertCommand(["gnt-job", "cancel", job_id]) # Now wait until the second job finishes, and expect the watch to fail due to # job cancellation AssertCommand(["gnt-job", "watch", job_id], fail=True) # Then check for job cancellation job_status = _GetJobStatus(job_id) if job_status != constants.JOB_STATUS_CANCELED: # Try and see if the job is being cancelled, and wait until the status # changes or we hit a timeout if job_status == constants.JOB_STATUS_CANCELING: retry_fn = functools.partial(_RetryingFetchJobStatus, constants.JOB_STATUS_CANCELING, job_id) try: # The multiplier to use is arbitrary, setting it higher could prevent # flakiness WAIT_MULTIPLIER = 4.0 job_status = retry.Retry(retry_fn, 2.0, WAIT_MULTIPLIER * FIRST_COMMAND_DELAY) except retry.RetryTimeout: # The job status remains the same pass if job_status != constants.JOB_STATUS_CANCELED: raise qa_error.Error("Job was not successfully cancelled, status " "found: %s" % job_status)
def ReloadCertificates(ensure_presence=True): """Reloads the client RAPI certificate with the one present on the node. If the QA is set up to use a specific certificate using the "rapi-files-location" parameter, it will be put in place prior to retrieving it. """ if ensure_presence: _EnsureRapiFilesPresence() if _rapi_username is None or _rapi_password is None: raise qa_error.Error("RAPI username and password have to be set before" " attempting to reload a certificate.") # pylint: disable=W0603 # due to global usage global _rapi_ca global _rapi_client master = qa_config.GetMasterNode() # Load RAPI certificate from master node cmd = [ "openssl", "x509", "-in", qa_utils.MakeNodePath(master, pathutils.RAPI_CERT_FILE) ] # Write to temporary file _rapi_ca = tempfile.NamedTemporaryFile() _rapi_ca.write( qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))) _rapi_ca.flush() port = qa_config.get("rapi-port", default=constants.DEFAULT_RAPI_PORT) cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name, proxy="") if qa_config.UseVirtualCluster(): # TODO: Implement full support for RAPI on virtual clusters print qa_logging.FormatWarning( "RAPI tests are not yet supported on" " virtual clusters and will be disabled") assert _rapi_client is None else: _rapi_client = rapi.client.GanetiRapiClient(master.primary, port=port, username=_rapi_username, password=_rapi_password, curl_config_fn=cfg_curl) print "RAPI protocol version: %s" % _rapi_client.GetVersion()
def GetObjectInfo(infocmd): """Get and parse information about a Ganeti object. @type infocmd: list of strings @param infocmd: command to be executed, e.g. ["gnt-cluster", "info"] @return: the information parsed, appropriately stored in dictionaries, lists... """ master = qa_config.GetMasterNode() cmdline = utils.ShellQuoteArgs(infocmd) info_out = GetCommandOutput(master.primary, cmdline) return yaml.load(info_out)
def _ReadSsconfInstanceList(): """Reads ssconf_instance_list from the master node. """ master = qa_config.GetMasterNode() ssconf_path = utils.PathJoin(pathutils.DATA_DIR, "ssconf_%s" % constants.SS_INSTANCE_LIST) cmd = ["cat", qa_utils.MakeNodePath(master, ssconf_path)] return qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)).splitlines()
def _InstanceRunning(name): """Checks whether an instance is running. @param name: full name of the instance """ master = qa_config.GetMasterNode() cmd = ( utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) + ' | grep running') ret = StartSSH(master.primary, cmd).wait() return ret == 0
def TestClusterVerifyDisksBrokenDRBD(instance, inst_nodes): """gnt-cluster verify-disks with broken DRBD""" qa_daemon.TestPauseWatcher() try: info = qa_instance.GetInstanceInfo(instance.name) snode = inst_nodes[1] for idx, minor in enumerate(info["drbd-minors"][snode.primary]): if idx % 2 == 0: break_drbd_cmd = \ "(drbdsetup %d down >/dev/null 2>&1;" \ " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) else: break_drbd_cmd = \ "(drbdsetup %d detach >/dev/null 2>&1;" \ " drbdsetup detach %d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) AssertCommand(break_drbd_cmd, node=snode) verify_output = GetCommandOutput(qa_config.GetMasterNode().primary, "gnt-cluster verify-disks") activation_msg = "Activating disks for instance '%s'" % instance.name if activation_msg not in verify_output: raise qa_error.Error( "gnt-cluster verify-disks did not activate broken" " DRBD disks:\n%s" % verify_output) verify_output = GetCommandOutput(qa_config.GetMasterNode().primary, "gnt-cluster verify-disks") if activation_msg in verify_output: raise qa_error.Error( "gnt-cluster verify-disks wants to activate broken" " DRBD disks on second attempt:\n%s" % verify_output) AssertCommand(_CLUSTER_VERIFY) finally: qa_daemon.TestResumeWatcher()
def _List(listcmd, fields, names): """Runs a list command. """ master = qa_config.GetMasterNode() cmd = [listcmd, "list", "--separator=|", "--no-headers", "--output", ",".join(fields)] if names: cmd.extend(names) return GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)).splitlines()
def GetOutputFromMaster(cmd, use_multiplexer=True, log_cmd=True): """ Gets the output of a command executed on master. """ if isinstance(cmd, basestring): cmdstr = cmd else: cmdstr = utils.ShellQuoteArgs(cmd) # Necessary due to the stderr stream not being captured properly on the # buildbot cmdstr += " 2>&1" return GetCommandOutput(qa_config.GetMasterNode().primary, cmdstr, use_multiplexer=use_multiplexer, log_cmd=log_cmd)
def TestClusterMasterFailover(): """gnt-cluster master-failover""" master = qa_config.GetMasterNode() failovermaster = qa_config.AcquireNode(exclude=master) cmd = ["gnt-cluster", "master-failover"] node_list_cmd = ["gnt-node", "list"] try: AssertCommand(cmd, node=failovermaster) AssertCommand(node_list_cmd, node=failovermaster) # Back to original master node AssertCommand(cmd, node=master) AssertCommand(node_list_cmd, node=master) finally: failovermaster.Release()
def _GetInstanceField(instance, field): """Get the value of a field of an instance. @type instance: string @param instance: Instance name @type field: string @param field: Name of the field @rtype: string """ master = qa_config.GetMasterNode() infocmd = utils.ShellQuoteArgs([ "gnt-instance", "list", "--no-headers", "--units", "m", "-o", field, instance ]) return qa_utils.GetCommandOutput(master.primary, infocmd).strip()
def _CreateOobScriptStructure(): """Create a simple OOB handling script and its structure.""" master = qa_config.GetMasterNode() data_path = qa_utils.UploadData(master.primary, "") verify_path = qa_utils.UploadData(master.primary, "") exit_code_path = qa_utils.UploadData(master.primary, "") oob_script = (("#!/bin/bash\n" "echo \"$@\" > %s\n" "cat %s\n" "exit $(< %s)\n") % (utils.ShellQuote(verify_path), utils.ShellQuote(data_path), utils.ShellQuote(exit_code_path))) oob_path = qa_utils.UploadData(master.primary, oob_script, mode=0700) return [oob_path, verify_path, data_path, exit_code_path]
def TestInstanceReboot(instance): """gnt-instance reboot""" options = qa_config.get("options", {}) reboot_types = options.get("reboot-types", constants.REBOOT_TYPES) name = instance.name for rtype in reboot_types: AssertCommand(["gnt-instance", "reboot", "--type=%s" % rtype, name]) AssertCommand(["gnt-instance", "shutdown", name]) qa_utils.RunInstanceCheck(instance, False) AssertCommand(["gnt-instance", "reboot", name]) master = qa_config.GetMasterNode() cmd = ["gnt-instance", "list", "--no-headers", "-o", "status", name] result_output = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) AssertEqual(result_output.strip(), constants.INSTST_RUNNING)