def IsExclusiveStorageInstanceTestEnabled(): test_name = "exclusive-storage-instance-tests" if qa_config.TestEnabled(test_name): vgname = qa_config.get("vg-name", constants.DEFAULT_VG) vgscmd = utils.ShellQuoteArgs([ "vgs", "--noheadings", "-o", "pv_count", vgname, ]) nodes = qa_config.GetConfig()["nodes"] for node in nodes: try: pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd)) except Exception, e: msg = ( "Cannot get the number of PVs on %s, needed by '%s': %s" % (node.primary, test_name, e)) raise qa_error.Error(msg) if pvnum < 2: raise qa_error.Error( "Node %s has not enough PVs (%s) to run '%s'" % (node.primary, pvnum, test_name)) res = True
def NodeAdd(node, readd=False, group=None): if not readd and node.added: raise qa_error.Error("Node %s already in cluster" % node.primary) elif readd and not node.added: raise qa_error.Error("Node %s not yet in cluster" % node.primary) cmd = ["gnt-node", "add", "--no-ssh-key-check"] if node.secondary: cmd.append("--secondary-ip=%s" % node.secondary) if readd: cmd.append("--readd") if group is not None: cmd.extend(["--node-group", group]) if not qa_config.GetModifySshSetup(): cmd.append("--no-node-setup") cmd.append(node.primary) AssertCommand(cmd) if readd: assert node.added else: node.MarkAdded()
def _RetrieveTerminationInfo(job_id): """ Retrieves the termination info from a job caused by gnt-debug delay. @rtype: dict or None @return: The termination log entry, or None if no entry was found """ job_info = GetObjectInfo(["gnt-job", "info", str(job_id)]) opcodes = job_info[0]["Opcodes"] if not opcodes: raise qa_error.Error("Cannot retrieve a list of opcodes") execution_logs = opcodes[0]["Execution log"] if not execution_logs: return None is_termination_info_fn = \ lambda e: e["Content"][1] == constants.ELOG_DELAY_TEST filtered_logs = filter(is_termination_info_fn, execution_logs) no_logs = len(filtered_logs) if no_logs > 1: raise qa_error.Error( "Too many interruption information entries found!") elif no_logs == 1: return filtered_logs[0] else: return None
def _AssertRetCode(rcode, fail, cmdstr, nodename): """Check the return value from a command and possibly raise an exception. """ if fail and rcode == 0: raise qa_error.Error("Command '%s' on node %s was expected to fail but" " didn't" % (cmdstr, nodename)) elif not fail and rcode != 0: raise qa_error.Error("Command '%s' on node %s failed, exit code %s" % (cmdstr, nodename, rcode))
def TestJobCancellation(): """gnt-job cancel""" # The delay used for the first command should be large enough for the next # command and the cancellation command to complete before the first job is # done. The second delay should be small enough that not too much time is # spend waiting in the case of a failed cancel and a running command. FIRST_COMMAND_DELAY = 10.0 AssertCommand(["gnt-debug", "delay", "--submit", str(FIRST_COMMAND_DELAY)]) SECOND_COMMAND_DELAY = 3.0 master = qa_config.GetMasterNode() # Forcing tty usage does not work on buildbot, so force all output of this # command to be redirected to stdout job_id_output = GetCommandOutput( master.primary, "gnt-debug delay --submit %s 2>&1" % SECOND_COMMAND_DELAY) possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output) if len(possible_job_ids) != 1: raise qa_error.Error( "Cannot parse gnt-debug delay output to find job id") job_id = possible_job_ids[0] AssertCommand(["gnt-job", "cancel", job_id]) # Now wait until the second job finishes, and expect the watch to fail due to # job cancellation AssertCommand(["gnt-job", "watch", job_id], fail=True) # Then check for job cancellation job_status = _GetJobStatus(job_id) if job_status != constants.JOB_STATUS_CANCELED: # Try and see if the job is being cancelled, and wait until the status # changes or we hit a timeout if job_status == constants.JOB_STATUS_CANCELING: retry_fn = functools.partial(_RetryingFetchJobStatus, constants.JOB_STATUS_CANCELING, job_id) try: # The multiplier to use is arbitrary, setting it higher could prevent # flakiness WAIT_MULTIPLIER = 4.0 job_status = retry.Retry(retry_fn, 2.0, WAIT_MULTIPLIER * FIRST_COMMAND_DELAY) except retry.RetryTimeout: # The job status remains the same pass if job_status != constants.JOB_STATUS_CANCELED: raise qa_error.Error("Job was not successfully cancelled, status " "found: %s" % job_status)
def Validate(self): """Validates loaded configuration data. """ if not self.get("name"): raise qa_error.Error("Cluster name is required") if not self.get("nodes"): raise qa_error.Error("Need at least one node") if not self.get("instances"): raise qa_error.Error("Need at least one instance") disks = self.GetDiskOptions() if disks is None: raise qa_error.Error("Config option 'disks' must exist") else: for d in disks: if d.get("size") is None or d.get("growth") is None: raise qa_error.Error( "Config options `size` and `growth` must exist" " for all `disks` items") check = self.GetInstanceCheckScript() if check: try: os.stat(check) except EnvironmentError as err: raise qa_error.Error( "Can't find instance check script '%s': %s" % (check, err)) enabled_hv = frozenset(self.GetEnabledHypervisors()) if not enabled_hv: raise qa_error.Error("No hypervisor is enabled") difference = enabled_hv - constants.HYPER_TYPES if difference: raise qa_error.Error("Unknown hypervisor(s) enabled: %s" % utils.CommaJoin(difference)) (vc_master, vc_basedir) = self.GetVclusterSettings() if bool(vc_master) != bool(vc_basedir): raise qa_error.Error( "All or none of the config options '%s' and '%s'" " must be set" % (_VCLUSTER_MASTER_KEY, _VCLUSTER_BASEDIR_KEY)) if vc_basedir and not utils.IsNormAbsPath(vc_basedir): raise qa_error.Error( "Path given in option '%s' must be absolute and" " normalized" % _VCLUSTER_BASEDIR_KEY)
def TestLiveRepair(): """Test node evacuate failover upon diagnosis. """ _SetUp('live-repair') n = random.randint(10000, 99999) node = qa_config.AcquireNode(exclude=qa_config.GetMasterNode()) UploadData( node.primary, 'echo \'' + serializer.DumpJson({ "status": "live-repair", "command": "repair", "details": str(n) }).strip() + '\'', 0755, '/etc/ganeti/node-diagnose-commands/live-repair') UploadData( node.primary, """#!/usr/bin/python import sys import json n = json.loads(sys.stdin.read())['details'] with open('/tmp/' + n, 'w') as f: f.write(n) print 'file written' """, 0755, '/etc/ganeti/node-repair-commands/repair') _AssertRepairCommand() tag = _AssertRepairTagAddition(node) if str(n) != AssertCommand(["cat", "/tmp/" + str(n)], node=node)[1]: raise qa_error.Error('Repair command was unsuccessful') node.Release() _TearDown(node, tag, [ '/etc/ganeti/node-diagnose-commands/live-repair', '/etc/ganeti/node-repair-commands/repair' ], False)
def _GetBlockingLocks(): """ Finds out which locks are blocking jobs by invoking "gnt-debug locks". @rtype: list of string @return: The names of the locks currently blocking any job. """ # Due to mysterious issues when a SSH multiplexer is being used by two # threads, we turn it off, and block most of the logging to improve the # visibility of the other thread's output locks_output = GetOutputFromMaster("gnt-debug locks", use_multiplexer=False, log_cmd=False) # The first non-empty line is the header, which we do not need lock_lines = locks_output.splitlines()[1:] blocking_locks = [] for lock_line in lock_lines: components = lock_line.split() if len(components) != 4: raise qa_error.Error("Error while parsing gnt-debug locks output, " "line at fault is: %s" % lock_line) lock_name, _, _, pending_jobs = components if pending_jobs != '-': blocking_locks.append(lock_name) return blocking_locks
def TestNodeAddAll(): """Adding all nodes to cluster.""" master = qa_config.GetMasterNode() for node in qa_config.get("nodes"): if node != master: NodeAdd(node, readd=False) for node in qa_config.get("nodes"): def GetNonStartDaemons(): cmd = utils.ShellQuoteArgs(["ps", "-Ao", "comm"]) prcs = AssertCommand(cmd, node=node)[1] non_start_daemons = [] def AddIfNotStarted(daemon): if daemon not in prcs: non_start_daemons.append(daemon) AddIfNotStarted('ganeti-noded') if constants.ENABLE_MOND: AddIfNotStarted('ganeti-mond') if node == master: AddIfNotStarted('ganeti-wconfd') AddIfNotStarted('ganeti-rapi') AddIfNotStarted('ganeti-luxid') AddIfNotStarted('ganeti-maintd') return non_start_daemons nsd = GetNonStartDaemons() for daemon in nsd: raise qa_error.Error(daemon + ' is not running at %s' % node.primary)
def _RetrieveSecret(instance, pnode): """Retrieves the DRBD secret given an instance object and the primary node. @type instance: L{qa_config._QaInstance} @type pnode: L{qa_config._QaNode} @rtype: string """ instance_info = GetInstanceInfo(instance.name) # We are interested in only the first disk on the primary drbd_minor = instance_info["drbd-minors"][pnode.primary][0] # This form should work for all DRBD versions drbd_command = ("drbdsetup show %d; drbdsetup %d show || true" % (drbd_minor, drbd_minor)) instance_drbd_info = \ qa_utils.GetCommandOutput(pnode.primary, drbd_command) match_obj = _DRBD_SECRET_RE.search(instance_drbd_info) if match_obj is None: raise qa_error.Error( "Could not retrieve DRBD secret for instance %s from" " node %s." % (instance.name, pnode.primary)) return match_obj.groups(0)[0]
def _StartDelayFunction(locks, timeout): """ Starts the gnt-debug delay option with the given locks and timeout. """ # The interruptible switch must be used cmd = ["gnt-debug", "delay", "-i", "--submit", "--no-master"] for node in locks.get(locking.LEVEL_NODE, []): cmd.append("-n%s" % node) cmd.append(str(timeout)) job_id = ExecuteJobProducingCommand(cmd) # Waits until a non-empty result is returned from the function log_entry = retry.SimpleRetry(lambda x: x, _RetrieveTerminationInfo, 2.0, 10.0, args=[job_id]) if not log_entry: raise qa_error.Error( "Failure when trying to retrieve delay termination " "information") _, _, (socket_path, ) = log_entry["Content"] return socket_path
def GetDiskOptions(self): """Return options for the disks of the instances. Get 'disks' parameter from the configuration data. If 'disks' is missing, try to create it from the legacy 'disk' and 'disk-growth' parameters. """ try: return self._data["disks"] except KeyError: pass # Legacy interface sizes = self._data.get("disk") growths = self._data.get("disk-growth") if sizes or growths: if (sizes is None or growths is None or len(sizes) != len(growths)): raise qa_error.Error( "Config options 'disk' and 'disk-growth' must" " exist and have the same number of items") disks = [] for (size, growth) in zip(sizes, growths): disks.append({"size": size, "growth": growth}) return disks else: return None
def Load(cls, filename): """Loads a configuration file and produces a configuration object. @type filename: string @param filename: Path to configuration file @rtype: L{_QaConfig} """ data = serializer.LoadJson(utils.ReadFile(filename)) # Patch the document using JSON Patch (RFC6902) in file _PATCH_JSON, if # available try: patch = serializer.LoadJson(utils.ReadFile(_PATCH_JSON)) if patch: mod = __import__("jsonpatch", fromlist=[]) data = mod.apply_patch(data, patch) except IOError: pass except ImportError: raise qa_error.Error( "If you want to use the QA JSON patching feature," " you need to install Python modules" " 'jsonpatch' and 'jsonpointer'.") result = cls(dict(map(_ConvertResources, data.items()))) # pylint: disable=E1103 result.Validate() return result
def Load(cls, filename): """Loads a configuration file and produces a configuration object. @type filename: string @param filename: Path to configuration file @rtype: L{_QaConfig} """ data = serializer.LoadJson(utils.ReadFile(filename)) # Patch the document using JSON Patch (RFC6902) in file _PATCH_JSON, if # available try: patches = _QaConfig.LoadPatches() # Try to use the module only if there is a non-empty patch present if any(patches.values()): mod = __import__("jsonpatch", fromlist=[]) _QaConfig.ApplyPatches(data, mod, patches) except IOError: pass except ImportError: raise qa_error.Error( "For the QA JSON patching feature to work, you " "need to install Python modules 'jsonpatch' and " "'jsonpointer'.") result = cls(dict(map(_ConvertResources, data.items()))) # pylint: disable=E1103 result.Validate() return result
def GetGenericAddParameters(inst, disk_template, force_mac=None): params = ["-B"] params.append("%s=%s,%s=%s" % (constants.BE_MINMEM, qa_config.get(constants.BE_MINMEM), constants.BE_MAXMEM, qa_config.get(constants.BE_MAXMEM))) if disk_template != constants.DT_DISKLESS: for idx, disk in enumerate(qa_config.GetDiskOptions()): size = disk.get("size") name = disk.get("name") diskparams = "%s:size=%s" % (idx, size) if name: diskparams += ",name=%s" % name if qa_config.AreSpindlesSupported(): spindles = disk.get("spindles") if spindles is None: raise qa_error.Error( "'spindles' is a required parameter for disks" " when you enable exclusive storage tests") diskparams += ",spindles=%s" % spindles params.extend(["--disk", diskparams]) # Set static MAC address if configured if force_mac: nic0_mac = force_mac else: nic0_mac = inst.GetNicMacAddr(0, None) if nic0_mac: params.extend(["--net", "0:mac=%s" % nic0_mac]) return params
def TestInstanceConsecutiveFailures(instance): """Test five consecutive instance failures. """ inst_name = qa_utils.ResolveInstanceName(instance.name) inst_was_running = bool(_InstanceRunning(inst_name)) _ResetWatcherDaemon() for should_start in ([True] * 5) + [False]: _ShutdownInstance(inst_name) RunWatcherDaemon() time.sleep(5) if bool(_InstanceRunning(inst_name)) != should_start: if should_start: msg = "Instance not started when it should" else: msg = "Instance started when it shouldn't" raise qa_error.Error(msg) AssertCommand(["gnt-instance", "info", inst_name]) if inst_was_running: _StartInstance(inst_name)
def TestClusterBurnin(): """Burnin""" master = qa_config.GetMasterNode() options = qa_config.get("options", {}) disk_template = options.get("burnin-disk-template", constants.DT_DRBD8) parallel = options.get("burnin-in-parallel", False) check_inst = options.get("burnin-check-instances", False) do_rename = options.get("burnin-rename", "") do_reboot = options.get("burnin-reboot", True) reboot_types = options.get("reboot-types", constants.REBOOT_TYPES) # Get as many instances as we need instances = [] try: try: num = qa_config.get("options", {}).get("burnin-instances", 1) for _ in range(0, num): instances.append(qa_config.AcquireInstance()) except qa_error.OutOfInstancesError: print "Not enough instances, continuing anyway." if len(instances) < 1: raise qa_error.Error("Burnin needs at least one instance") script = qa_utils.UploadFile(master.primary, "../tools/burnin") try: disks = qa_config.GetDiskOptions() # Run burnin cmd = [ "env", "PYTHONPATH=%s" % _constants.VERSIONEDSHAREDIR, script, "--os=%s" % qa_config.get("os"), "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM), "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM), "--disk-size=%s" % ",".join([d.get("size") for d in disks]), "--disk-growth=%s" % ",".join([d.get("growth") for d in disks]), "--disk-template=%s" % disk_template ] if parallel: cmd.append("--parallel") cmd.append("--early-release") if check_inst: cmd.append("--http-check") if do_rename: cmd.append("--rename=%s" % do_rename) if not do_reboot: cmd.append("--no-reboot") else: cmd.append("--reboot-types=%s" % ",".join(reboot_types)) cmd += [inst.name for inst in instances] AssertCommand(cmd) finally: AssertCommand(["rm", "-f", script]) finally: for inst in instances: inst.Release()
def fn(): tags = _GetMaintTags(node) if len(tags) == 0: raise retry.RetryAgain() if len(tags) > 1: raise qa_error.Error("Only one tag should be added") else: return tags[0]
def TestEmptyCluster(): """Testing remote API on an empty cluster. """ master = qa_config.GetMasterNode() master_full = qa_utils.ResolveNodeName(master) def _VerifyInfo(data): AssertIn("name", data) AssertIn("master", data) AssertEqual(data["master"], master_full) def _VerifyNodes(data): master_entry = { "id": master_full, "uri": "/2/nodes/%s" % master_full, } AssertIn(master_entry, data) def _VerifyNodesBulk(data): for node in data: for entry in NODE_FIELDS: AssertIn(entry, node) def _VerifyGroups(data): default_group = { "name": constants.INITIAL_NODE_GROUP_NAME, "uri": "/2/groups/" + constants.INITIAL_NODE_GROUP_NAME, } AssertIn(default_group, data) def _VerifyGroupsBulk(data): for group in data: for field in GROUP_FIELDS: AssertIn(field, group) _DoTests([ ("/", None, "GET", None), ("/2/info", _VerifyInfo, "GET", None), ("/2/tags", None, "GET", None), ("/2/nodes", _VerifyNodes, "GET", None), ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None), ("/2/groups", _VerifyGroups, "GET", None), ("/2/groups?bulk=1", _VerifyGroupsBulk, "GET", None), ("/2/instances", [], "GET", None), ("/2/instances?bulk=1", [], "GET", None), ("/2/os", None, "GET", None), ]) # Test HTTP Not Found for method in ["GET", "PUT", "POST", "DELETE"]: try: _DoTests([("/99/resource/not/here/99", None, method, None)]) except rapi.client.GanetiApiError, err: AssertEqual(err.code, 404) else: raise qa_error.Error( "Non-existent resource didn't return HTTP 404")
def _RaiseWithInfo(msg, error_desc): """Raises a QA error with the given content, and adds a message if present. """ if msg: output = "%s: %s" % (msg, error_desc) else: output = error_desc raise qa_error.Error(output)
def TestRapiStoppedInstanceConsole(instance): """Test getting stopped instance's console information via RAPI""" try: _rapi_client.GetInstanceConsole(instance.name) except rapi.client.GanetiApiError as err: AssertEqual(err.code, 503) else: raise qa_error.Error("Getting console for stopped instance didn't" " return HTTP 503")
def AssertCommand(cmd, fail=False, node=None, log_cmd=True, forward_agent=True, max_seconds=None): """Checks that a remote command succeeds. @param cmd: either a string (the command to execute) or a list (to be converted using L{utils.ShellQuoteArgs} into a string) @type fail: boolean or None @param fail: if the command is expected to fail instead of succeeding, or None if we don't care @param node: if passed, it should be the node on which the command should be executed, instead of the master node (can be either a dict or a string) @param log_cmd: if False, the command won't be logged (simply passed to StartSSH) @type forward_agent: boolean @param forward_agent: whether to forward the agent when starting the SSH session or not, sometimes useful for crypto-related operations which can use a key they should not @type max_seconds: double @param max_seconds: fail if the command takes more than C{max_seconds} seconds @return: the return code, stdout and stderr of the command @raise qa_error.Error: if the command fails when it shouldn't or vice versa """ if node is None: node = qa_config.GetMasterNode() nodename = _GetName(node, operator.attrgetter("primary")) if isinstance(cmd, basestring): cmdstr = cmd else: cmdstr = utils.ShellQuoteArgs(cmd) start = datetime.datetime.now() popen = StartSSH(nodename, cmdstr, log_cmd=log_cmd, forward_agent=forward_agent) # Run the command stdout, stderr = popen.communicate() rcode = popen.returncode duration_seconds = TimedeltaToTotalSeconds(datetime.datetime.now() - start) try: if fail is not None: _AssertRetCode(rcode, fail, cmdstr, nodename) finally: if log_cmd: _PrintCommandOutput(stdout, stderr) if max_seconds is not None: if duration_seconds > max_seconds: raise qa_error.Error( "Cmd '%s' took %f seconds, maximum of %f was exceeded" % (cmdstr, duration_seconds, max_seconds)) return rcode, stdout, stderr
def TestRapiInstanceMultiAlloc(node): """Test adding two new instances via the RAPI instance-multi-alloc method""" if not qa_config.IsTemplateSupported(constants.DT_PLAIN): return JOBS_KEY = "jobs" instance_one = qa_config.AcquireInstance() instance_two = qa_config.AcquireInstance() instance_list = [instance_one, instance_two] try: rapi_dicts = [ _GenInstanceAllocationDict(node, i) for i in instance_list ] job_id = _rapi_client.InstancesMultiAlloc(rapi_dicts) results, = _WaitForRapiJob(job_id) if JOBS_KEY not in results: raise qa_error.Error("RAPI instance-multi-alloc did not deliver " "information about created jobs") if len(results[JOBS_KEY]) != len(instance_list): raise qa_error.Error( "RAPI instance-multi-alloc failed to return the " "desired number of jobs!") for success, job in results[JOBS_KEY]: if success: _WaitForRapiJob(job) else: raise qa_error.Error("Failed to create instance in " "instance-multi-alloc call") except: # Note that although released, it may be that some of the instance creations # have in fact succeeded. Handling this in a better way may be possible, but # is not necessary as the QA has already failed at this point. for instance in instance_list: instance.Release() raise return (instance_one, instance_two)
def _StartInstance(name): """Starts instance and waits for completion. @param name: full name of the instance """ AssertCommand(["gnt-instance", "start", name]) if not bool(_InstanceRunning(name)): raise qa_error.Error("instance start failed")
def _ShutdownInstance(name): """Shuts down instance without recording state and waits for completion. @param name: full name of the instance """ AssertCommand(["gnt-instance", "shutdown", "--no-remember", name]) if _InstanceRunning(name): raise qa_error.Error("instance shutdown failed")
def WaitForCompletion(self): """Wait for the completion of all registered jobs. """ while self._HasPendingJobs(): time.sleep(2) with self._lock: if self._jobs: raise qa_error.Error( "Jobs %s didn't finish in success state!" % self._GetJobIds())
def Validate(self): """Validates loaded configuration data. """ if not self.get("name"): raise qa_error.Error("Cluster name is required") if not self.get("nodes"): raise qa_error.Error("Need at least one node") if not self.get("instances"): raise qa_error.Error("Need at least one instance") disks = self.GetDiskOptions() if disks is None: raise qa_error.Error("Config option 'disks' must exist") else: for d in disks: if d.get("size") is None or d.get("growth") is None: raise qa_error.Error("Config options `size` and `growth` must exist" " for all `disks` items") check = self.GetInstanceCheckScript() if check: try: os.stat(check) except EnvironmentError, err: raise qa_error.Error("Can't find instance check script '%s': %s" % (check, err))
def ReloadCertificates(ensure_presence=True): """Reloads the client RAPI certificate with the one present on the node. If the QA is set up to use a specific certificate using the "rapi-files-location" parameter, it will be put in place prior to retrieving it. """ if ensure_presence: _EnsureRapiFilesPresence() if _rapi_username is None or _rapi_password is None: raise qa_error.Error("RAPI username and password have to be set before" " attempting to reload a certificate.") # pylint: disable=W0603 # due to global usage global _rapi_ca global _rapi_client master = qa_config.GetMasterNode() # Load RAPI certificate from master node cmd = [ "openssl", "x509", "-in", qa_utils.MakeNodePath(master, pathutils.RAPI_CERT_FILE) ] # Write to temporary file _rapi_ca = tempfile.NamedTemporaryFile() _rapi_ca.write( qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))) _rapi_ca.flush() port = qa_config.get("rapi-port", default=constants.DEFAULT_RAPI_PORT) cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name, proxy="") if qa_config.UseVirtualCluster(): # TODO: Implement full support for RAPI on virtual clusters print qa_logging.FormatWarning( "RAPI tests are not yet supported on" " virtual clusters and will be disabled") assert _rapi_client is None else: _rapi_client = rapi.client.GanetiRapiClient(master.primary, port=port, username=_rapi_username, password=_rapi_password, curl_config_fn=cfg_curl) print "RAPI protocol version: %s" % _rapi_client.GetVersion()
def _ReadRapiSecret(password_file_path): """Reads a RAPI secret stored locally. @type password_file_path: string @return: Login secret for the user """ try: with open(password_file_path, 'r') as pw_file: return pw_file.readline().strip() except IOError: raise qa_error.Error("Could not open the RAPI password file located at" " %s" % password_file_path)
def TestClusterVerifyDisksBrokenDRBD(instance, inst_nodes): """gnt-cluster verify-disks with broken DRBD""" qa_daemon.TestPauseWatcher() try: info = qa_instance.GetInstanceInfo(instance.name) snode = inst_nodes[1] for idx, minor in enumerate(info["drbd-minors"][snode.primary]): if idx % 2 == 0: break_drbd_cmd = \ "(drbdsetup %d down >/dev/null 2>&1;" \ " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) else: break_drbd_cmd = \ "(drbdsetup %d detach >/dev/null 2>&1;" \ " drbdsetup detach %d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) AssertCommand(break_drbd_cmd, node=snode) verify_output = GetCommandOutput(qa_config.GetMasterNode().primary, "gnt-cluster verify-disks") activation_msg = "Activating disks for instance '%s'" % instance.name if activation_msg not in verify_output: raise qa_error.Error( "gnt-cluster verify-disks did not activate broken" " DRBD disks:\n%s" % verify_output) verify_output = GetCommandOutput(qa_config.GetMasterNode().primary, "gnt-cluster verify-disks") if activation_msg in verify_output: raise qa_error.Error( "gnt-cluster verify-disks wants to activate broken" " DRBD disks on second attempt:\n%s" % verify_output) AssertCommand(_CLUSTER_VERIFY) finally: qa_daemon.TestResumeWatcher()