Ejemplo n.º 1
0
def IsExclusiveStorageInstanceTestEnabled():
    test_name = "exclusive-storage-instance-tests"
    if qa_config.TestEnabled(test_name):
        vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
        vgscmd = utils.ShellQuoteArgs([
            "vgs",
            "--noheadings",
            "-o",
            "pv_count",
            vgname,
        ])
        nodes = qa_config.GetConfig()["nodes"]
        for node in nodes:
            try:
                pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
            except Exception, e:
                msg = (
                    "Cannot get the number of PVs on %s, needed by '%s': %s" %
                    (node.primary, test_name, e))
                raise qa_error.Error(msg)
            if pvnum < 2:
                raise qa_error.Error(
                    "Node %s has not enough PVs (%s) to run '%s'" %
                    (node.primary, pvnum, test_name))
        res = True
Ejemplo n.º 2
0
def NodeAdd(node, readd=False, group=None):
  if not readd and node.added:
    raise qa_error.Error("Node %s already in cluster" % node.primary)
  elif readd and not node.added:
    raise qa_error.Error("Node %s not yet in cluster" % node.primary)

  cmd = ["gnt-node", "add", "--no-ssh-key-check"]
  if node.secondary:
    cmd.append("--secondary-ip=%s" % node.secondary)
  if readd:
    cmd.append("--readd")
  if group is not None:
    cmd.extend(["--node-group", group])

  if not qa_config.GetModifySshSetup():
    cmd.append("--no-node-setup")

  cmd.append(node.primary)

  AssertCommand(cmd)

  if readd:
    assert node.added
  else:
    node.MarkAdded()
Ejemplo n.º 3
0
def _RetrieveTerminationInfo(job_id):
    """ Retrieves the termination info from a job caused by gnt-debug delay.

  @rtype: dict or None
  @return: The termination log entry, or None if no entry was found

  """
    job_info = GetObjectInfo(["gnt-job", "info", str(job_id)])

    opcodes = job_info[0]["Opcodes"]
    if not opcodes:
        raise qa_error.Error("Cannot retrieve a list of opcodes")

    execution_logs = opcodes[0]["Execution log"]
    if not execution_logs:
        return None

    is_termination_info_fn = \
      lambda e: e["Content"][1] == constants.ELOG_DELAY_TEST

    filtered_logs = filter(is_termination_info_fn, execution_logs)

    no_logs = len(filtered_logs)
    if no_logs > 1:
        raise qa_error.Error(
            "Too many interruption information entries found!")
    elif no_logs == 1:
        return filtered_logs[0]
    else:
        return None
Ejemplo n.º 4
0
def _AssertRetCode(rcode, fail, cmdstr, nodename):
    """Check the return value from a command and possibly raise an exception.

  """
    if fail and rcode == 0:
        raise qa_error.Error("Command '%s' on node %s was expected to fail but"
                             " didn't" % (cmdstr, nodename))
    elif not fail and rcode != 0:
        raise qa_error.Error("Command '%s' on node %s failed, exit code %s" %
                             (cmdstr, nodename, rcode))
Ejemplo n.º 5
0
def TestJobCancellation():
    """gnt-job cancel"""
    # The delay used for the first command should be large enough for the next
    # command and the cancellation command to complete before the first job is
    # done. The second delay should be small enough that not too much time is
    # spend waiting in the case of a failed cancel and a running command.
    FIRST_COMMAND_DELAY = 10.0
    AssertCommand(["gnt-debug", "delay", "--submit", str(FIRST_COMMAND_DELAY)])

    SECOND_COMMAND_DELAY = 3.0
    master = qa_config.GetMasterNode()

    # Forcing tty usage does not work on buildbot, so force all output of this
    # command to be redirected to stdout
    job_id_output = GetCommandOutput(
        master.primary,
        "gnt-debug delay --submit %s 2>&1" % SECOND_COMMAND_DELAY)

    possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output)
    if len(possible_job_ids) != 1:
        raise qa_error.Error(
            "Cannot parse gnt-debug delay output to find job id")

    job_id = possible_job_ids[0]
    AssertCommand(["gnt-job", "cancel", job_id])

    # Now wait until the second job finishes, and expect the watch to fail due to
    # job cancellation
    AssertCommand(["gnt-job", "watch", job_id], fail=True)

    # Then check for job cancellation
    job_status = _GetJobStatus(job_id)
    if job_status != constants.JOB_STATUS_CANCELED:
        # Try and see if the job is being cancelled, and wait until the status
        # changes or we hit a timeout
        if job_status == constants.JOB_STATUS_CANCELING:
            retry_fn = functools.partial(_RetryingFetchJobStatus,
                                         constants.JOB_STATUS_CANCELING,
                                         job_id)
            try:
                # The multiplier to use is arbitrary, setting it higher could prevent
                # flakiness
                WAIT_MULTIPLIER = 4.0
                job_status = retry.Retry(retry_fn, 2.0,
                                         WAIT_MULTIPLIER * FIRST_COMMAND_DELAY)
            except retry.RetryTimeout:
                # The job status remains the same
                pass

        if job_status != constants.JOB_STATUS_CANCELED:
            raise qa_error.Error("Job was not successfully cancelled, status "
                                 "found: %s" % job_status)
Ejemplo n.º 6
0
    def Validate(self):
        """Validates loaded configuration data.

    """
        if not self.get("name"):
            raise qa_error.Error("Cluster name is required")

        if not self.get("nodes"):
            raise qa_error.Error("Need at least one node")

        if not self.get("instances"):
            raise qa_error.Error("Need at least one instance")

        disks = self.GetDiskOptions()
        if disks is None:
            raise qa_error.Error("Config option 'disks' must exist")
        else:
            for d in disks:
                if d.get("size") is None or d.get("growth") is None:
                    raise qa_error.Error(
                        "Config options `size` and `growth` must exist"
                        " for all `disks` items")
        check = self.GetInstanceCheckScript()
        if check:
            try:
                os.stat(check)
            except EnvironmentError as err:
                raise qa_error.Error(
                    "Can't find instance check script '%s': %s" % (check, err))

        enabled_hv = frozenset(self.GetEnabledHypervisors())
        if not enabled_hv:
            raise qa_error.Error("No hypervisor is enabled")

        difference = enabled_hv - constants.HYPER_TYPES
        if difference:
            raise qa_error.Error("Unknown hypervisor(s) enabled: %s" %
                                 utils.CommaJoin(difference))

        (vc_master, vc_basedir) = self.GetVclusterSettings()
        if bool(vc_master) != bool(vc_basedir):
            raise qa_error.Error(
                "All or none of the config options '%s' and '%s'"
                " must be set" % (_VCLUSTER_MASTER_KEY, _VCLUSTER_BASEDIR_KEY))

        if vc_basedir and not utils.IsNormAbsPath(vc_basedir):
            raise qa_error.Error(
                "Path given in option '%s' must be absolute and"
                " normalized" % _VCLUSTER_BASEDIR_KEY)
Ejemplo n.º 7
0
def TestLiveRepair():
    """Test node evacuate failover upon diagnosis.

  """
    _SetUp('live-repair')
    n = random.randint(10000, 99999)
    node = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
    UploadData(
        node.primary, 'echo \'' + serializer.DumpJson({
            "status": "live-repair",
            "command": "repair",
            "details": str(n)
        }).strip() + '\'', 0755,
        '/etc/ganeti/node-diagnose-commands/live-repair')
    UploadData(
        node.primary, """#!/usr/bin/python
import sys
import json

n = json.loads(sys.stdin.read())['details']
with open('/tmp/' + n, 'w') as f:
  f.write(n)
print 'file written'
""", 0755, '/etc/ganeti/node-repair-commands/repair')
    _AssertRepairCommand()
    tag = _AssertRepairTagAddition(node)
    if str(n) != AssertCommand(["cat", "/tmp/" + str(n)], node=node)[1]:
        raise qa_error.Error('Repair command was unsuccessful')
    node.Release()
    _TearDown(node, tag, [
        '/etc/ganeti/node-diagnose-commands/live-repair',
        '/etc/ganeti/node-repair-commands/repair'
    ], False)
Ejemplo n.º 8
0
def _GetBlockingLocks():
    """ Finds out which locks are blocking jobs by invoking "gnt-debug locks".

  @rtype: list of string
  @return: The names of the locks currently blocking any job.

  """
    # Due to mysterious issues when a SSH multiplexer is being used by two
    # threads, we turn it off, and block most of the logging to improve the
    # visibility of the other thread's output
    locks_output = GetOutputFromMaster("gnt-debug locks",
                                       use_multiplexer=False,
                                       log_cmd=False)

    # The first non-empty line is the header, which we do not need
    lock_lines = locks_output.splitlines()[1:]

    blocking_locks = []
    for lock_line in lock_lines:
        components = lock_line.split()
        if len(components) != 4:
            raise qa_error.Error("Error while parsing gnt-debug locks output, "
                                 "line at fault is: %s" % lock_line)

        lock_name, _, _, pending_jobs = components

        if pending_jobs != '-':
            blocking_locks.append(lock_name)

    return blocking_locks
Ejemplo n.º 9
0
def TestNodeAddAll():
  """Adding all nodes to cluster."""
  master = qa_config.GetMasterNode()
  for node in qa_config.get("nodes"):
    if node != master:
      NodeAdd(node, readd=False)

  for node in qa_config.get("nodes"):
    def GetNonStartDaemons():
      cmd = utils.ShellQuoteArgs(["ps", "-Ao", "comm"])
      prcs = AssertCommand(cmd, node=node)[1]

      non_start_daemons = []

      def AddIfNotStarted(daemon):
        if daemon not in prcs:
          non_start_daemons.append(daemon)

      AddIfNotStarted('ganeti-noded')
      if constants.ENABLE_MOND:
        AddIfNotStarted('ganeti-mond')
      if node == master:
        AddIfNotStarted('ganeti-wconfd')
        AddIfNotStarted('ganeti-rapi')
        AddIfNotStarted('ganeti-luxid')
        AddIfNotStarted('ganeti-maintd')
      return non_start_daemons

    nsd = GetNonStartDaemons()
    for daemon in nsd:
      raise qa_error.Error(daemon + ' is not running at %s' % node.primary)
Ejemplo n.º 10
0
def _RetrieveSecret(instance, pnode):
    """Retrieves the DRBD secret given an instance object and the primary node.

  @type instance: L{qa_config._QaInstance}
  @type pnode: L{qa_config._QaNode}

  @rtype: string

  """
    instance_info = GetInstanceInfo(instance.name)

    # We are interested in only the first disk on the primary
    drbd_minor = instance_info["drbd-minors"][pnode.primary][0]

    # This form should work for all DRBD versions
    drbd_command = ("drbdsetup show %d; drbdsetup %d show || true" %
                    (drbd_minor, drbd_minor))
    instance_drbd_info = \
      qa_utils.GetCommandOutput(pnode.primary, drbd_command)

    match_obj = _DRBD_SECRET_RE.search(instance_drbd_info)
    if match_obj is None:
        raise qa_error.Error(
            "Could not retrieve DRBD secret for instance %s from"
            " node %s." % (instance.name, pnode.primary))

    return match_obj.groups(0)[0]
Ejemplo n.º 11
0
def _StartDelayFunction(locks, timeout):
    """ Starts the gnt-debug delay option with the given locks and timeout.

  """
    # The interruptible switch must be used
    cmd = ["gnt-debug", "delay", "-i", "--submit", "--no-master"]

    for node in locks.get(locking.LEVEL_NODE, []):
        cmd.append("-n%s" % node)
    cmd.append(str(timeout))

    job_id = ExecuteJobProducingCommand(cmd)

    # Waits until a non-empty result is returned from the function
    log_entry = retry.SimpleRetry(lambda x: x,
                                  _RetrieveTerminationInfo,
                                  2.0,
                                  10.0,
                                  args=[job_id])

    if not log_entry:
        raise qa_error.Error(
            "Failure when trying to retrieve delay termination "
            "information")

    _, _, (socket_path, ) = log_entry["Content"]

    return socket_path
Ejemplo n.º 12
0
    def GetDiskOptions(self):
        """Return options for the disks of the instances.

    Get 'disks' parameter from the configuration data. If 'disks' is missing,
    try to create it from the legacy 'disk' and 'disk-growth' parameters.

    """
        try:
            return self._data["disks"]
        except KeyError:
            pass

        # Legacy interface
        sizes = self._data.get("disk")
        growths = self._data.get("disk-growth")
        if sizes or growths:
            if (sizes is None or growths is None
                    or len(sizes) != len(growths)):
                raise qa_error.Error(
                    "Config options 'disk' and 'disk-growth' must"
                    " exist and have the same number of items")
            disks = []
            for (size, growth) in zip(sizes, growths):
                disks.append({"size": size, "growth": growth})
            return disks
        else:
            return None
Ejemplo n.º 13
0
    def Load(cls, filename):
        """Loads a configuration file and produces a configuration object.

    @type filename: string
    @param filename: Path to configuration file
    @rtype: L{_QaConfig}

    """
        data = serializer.LoadJson(utils.ReadFile(filename))

        # Patch the document using JSON Patch (RFC6902) in file _PATCH_JSON, if
        # available
        try:
            patch = serializer.LoadJson(utils.ReadFile(_PATCH_JSON))
            if patch:
                mod = __import__("jsonpatch", fromlist=[])
                data = mod.apply_patch(data, patch)
        except IOError:
            pass
        except ImportError:
            raise qa_error.Error(
                "If you want to use the QA JSON patching feature,"
                " you need to install Python modules"
                " 'jsonpatch' and 'jsonpointer'.")

        result = cls(dict(map(_ConvertResources, data.items())))  # pylint: disable=E1103
        result.Validate()

        return result
Ejemplo n.º 14
0
    def Load(cls, filename):
        """Loads a configuration file and produces a configuration object.

    @type filename: string
    @param filename: Path to configuration file
    @rtype: L{_QaConfig}

    """
        data = serializer.LoadJson(utils.ReadFile(filename))

        # Patch the document using JSON Patch (RFC6902) in file _PATCH_JSON, if
        # available
        try:
            patches = _QaConfig.LoadPatches()
            # Try to use the module only if there is a non-empty patch present
            if any(patches.values()):
                mod = __import__("jsonpatch", fromlist=[])
                _QaConfig.ApplyPatches(data, mod, patches)
        except IOError:
            pass
        except ImportError:
            raise qa_error.Error(
                "For the QA JSON patching feature to work, you "
                "need to install Python modules 'jsonpatch' and "
                "'jsonpointer'.")

        result = cls(dict(map(_ConvertResources, data.items())))  # pylint: disable=E1103
        result.Validate()

        return result
Ejemplo n.º 15
0
def GetGenericAddParameters(inst, disk_template, force_mac=None):
    params = ["-B"]
    params.append("%s=%s,%s=%s" %
                  (constants.BE_MINMEM, qa_config.get(constants.BE_MINMEM),
                   constants.BE_MAXMEM, qa_config.get(constants.BE_MAXMEM)))

    if disk_template != constants.DT_DISKLESS:
        for idx, disk in enumerate(qa_config.GetDiskOptions()):
            size = disk.get("size")
            name = disk.get("name")
            diskparams = "%s:size=%s" % (idx, size)
            if name:
                diskparams += ",name=%s" % name
            if qa_config.AreSpindlesSupported():
                spindles = disk.get("spindles")
                if spindles is None:
                    raise qa_error.Error(
                        "'spindles' is a required parameter for disks"
                        " when you enable exclusive storage tests")
                diskparams += ",spindles=%s" % spindles
            params.extend(["--disk", diskparams])

    # Set static MAC address if configured
    if force_mac:
        nic0_mac = force_mac
    else:
        nic0_mac = inst.GetNicMacAddr(0, None)

    if nic0_mac:
        params.extend(["--net", "0:mac=%s" % nic0_mac])

    return params
Ejemplo n.º 16
0
def TestInstanceConsecutiveFailures(instance):
    """Test five consecutive instance failures.

  """
    inst_name = qa_utils.ResolveInstanceName(instance.name)
    inst_was_running = bool(_InstanceRunning(inst_name))

    _ResetWatcherDaemon()

    for should_start in ([True] * 5) + [False]:
        _ShutdownInstance(inst_name)
        RunWatcherDaemon()
        time.sleep(5)

        if bool(_InstanceRunning(inst_name)) != should_start:
            if should_start:
                msg = "Instance not started when it should"
            else:
                msg = "Instance started when it shouldn't"
            raise qa_error.Error(msg)

    AssertCommand(["gnt-instance", "info", inst_name])

    if inst_was_running:
        _StartInstance(inst_name)
Ejemplo n.º 17
0
def TestClusterBurnin():
    """Burnin"""
    master = qa_config.GetMasterNode()

    options = qa_config.get("options", {})
    disk_template = options.get("burnin-disk-template", constants.DT_DRBD8)
    parallel = options.get("burnin-in-parallel", False)
    check_inst = options.get("burnin-check-instances", False)
    do_rename = options.get("burnin-rename", "")
    do_reboot = options.get("burnin-reboot", True)
    reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)

    # Get as many instances as we need
    instances = []
    try:
        try:
            num = qa_config.get("options", {}).get("burnin-instances", 1)
            for _ in range(0, num):
                instances.append(qa_config.AcquireInstance())
        except qa_error.OutOfInstancesError:
            print "Not enough instances, continuing anyway."

        if len(instances) < 1:
            raise qa_error.Error("Burnin needs at least one instance")

        script = qa_utils.UploadFile(master.primary, "../tools/burnin")
        try:
            disks = qa_config.GetDiskOptions()
            # Run burnin
            cmd = [
                "env",
                "PYTHONPATH=%s" % _constants.VERSIONEDSHAREDIR, script,
                "--os=%s" % qa_config.get("os"),
                "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
                "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
                "--disk-size=%s" % ",".join([d.get("size") for d in disks]),
                "--disk-growth=%s" % ",".join([d.get("growth")
                                               for d in disks]),
                "--disk-template=%s" % disk_template
            ]
            if parallel:
                cmd.append("--parallel")
                cmd.append("--early-release")
            if check_inst:
                cmd.append("--http-check")
            if do_rename:
                cmd.append("--rename=%s" % do_rename)
            if not do_reboot:
                cmd.append("--no-reboot")
            else:
                cmd.append("--reboot-types=%s" % ",".join(reboot_types))
            cmd += [inst.name for inst in instances]
            AssertCommand(cmd)
        finally:
            AssertCommand(["rm", "-f", script])

    finally:
        for inst in instances:
            inst.Release()
Ejemplo n.º 18
0
 def fn():
     tags = _GetMaintTags(node)
     if len(tags) == 0:
         raise retry.RetryAgain()
     if len(tags) > 1:
         raise qa_error.Error("Only one tag should be added")
     else:
         return tags[0]
Ejemplo n.º 19
0
def TestEmptyCluster():
    """Testing remote API on an empty cluster.

  """
    master = qa_config.GetMasterNode()
    master_full = qa_utils.ResolveNodeName(master)

    def _VerifyInfo(data):
        AssertIn("name", data)
        AssertIn("master", data)
        AssertEqual(data["master"], master_full)

    def _VerifyNodes(data):
        master_entry = {
            "id": master_full,
            "uri": "/2/nodes/%s" % master_full,
        }
        AssertIn(master_entry, data)

    def _VerifyNodesBulk(data):
        for node in data:
            for entry in NODE_FIELDS:
                AssertIn(entry, node)

    def _VerifyGroups(data):
        default_group = {
            "name": constants.INITIAL_NODE_GROUP_NAME,
            "uri": "/2/groups/" + constants.INITIAL_NODE_GROUP_NAME,
        }
        AssertIn(default_group, data)

    def _VerifyGroupsBulk(data):
        for group in data:
            for field in GROUP_FIELDS:
                AssertIn(field, group)

    _DoTests([
        ("/", None, "GET", None),
        ("/2/info", _VerifyInfo, "GET", None),
        ("/2/tags", None, "GET", None),
        ("/2/nodes", _VerifyNodes, "GET", None),
        ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None),
        ("/2/groups", _VerifyGroups, "GET", None),
        ("/2/groups?bulk=1", _VerifyGroupsBulk, "GET", None),
        ("/2/instances", [], "GET", None),
        ("/2/instances?bulk=1", [], "GET", None),
        ("/2/os", None, "GET", None),
    ])

    # Test HTTP Not Found
    for method in ["GET", "PUT", "POST", "DELETE"]:
        try:
            _DoTests([("/99/resource/not/here/99", None, method, None)])
        except rapi.client.GanetiApiError, err:
            AssertEqual(err.code, 404)
        else:
            raise qa_error.Error(
                "Non-existent resource didn't return HTTP 404")
Ejemplo n.º 20
0
def _RaiseWithInfo(msg, error_desc):
    """Raises a QA error with the given content, and adds a message if present.

  """
    if msg:
        output = "%s: %s" % (msg, error_desc)
    else:
        output = error_desc
    raise qa_error.Error(output)
Ejemplo n.º 21
0
def TestRapiStoppedInstanceConsole(instance):
    """Test getting stopped instance's console information via RAPI"""
    try:
        _rapi_client.GetInstanceConsole(instance.name)
    except rapi.client.GanetiApiError as err:
        AssertEqual(err.code, 503)
    else:
        raise qa_error.Error("Getting console for stopped instance didn't"
                             " return HTTP 503")
Ejemplo n.º 22
0
def AssertCommand(cmd, fail=False, node=None, log_cmd=True, forward_agent=True,
                  max_seconds=None):
  """Checks that a remote command succeeds.

  @param cmd: either a string (the command to execute) or a list (to
      be converted using L{utils.ShellQuoteArgs} into a string)
  @type fail: boolean or None
  @param fail: if the command is expected to fail instead of succeeding,
               or None if we don't care
  @param node: if passed, it should be the node on which the command
      should be executed, instead of the master node (can be either a
      dict or a string)
  @param log_cmd: if False, the command won't be logged (simply passed to
      StartSSH)
  @type forward_agent: boolean
  @param forward_agent: whether to forward the agent when starting the SSH
                        session or not, sometimes useful for crypto-related
                        operations which can use a key they should not
  @type max_seconds: double
  @param max_seconds: fail if the command takes more than C{max_seconds}
      seconds
  @return: the return code, stdout and stderr of the command
  @raise qa_error.Error: if the command fails when it shouldn't or vice versa

  """
  if node is None:
    node = qa_config.GetMasterNode()

  nodename = _GetName(node, operator.attrgetter("primary"))

  if isinstance(cmd, basestring):
    cmdstr = cmd
  else:
    cmdstr = utils.ShellQuoteArgs(cmd)

  start = datetime.datetime.now()
  popen = StartSSH(nodename, cmdstr, log_cmd=log_cmd,
                   forward_agent=forward_agent)
  # Run the command
  stdout, stderr = popen.communicate()
  rcode = popen.returncode
  duration_seconds = TimedeltaToTotalSeconds(datetime.datetime.now() - start)

  try:
    if fail is not None:
      _AssertRetCode(rcode, fail, cmdstr, nodename)
  finally:
    if log_cmd:
      _PrintCommandOutput(stdout, stderr)

  if max_seconds is not None:
    if duration_seconds > max_seconds:
      raise qa_error.Error(
        "Cmd '%s' took %f seconds, maximum of %f was exceeded" %
        (cmdstr, duration_seconds, max_seconds))

  return rcode, stdout, stderr
Ejemplo n.º 23
0
def TestRapiInstanceMultiAlloc(node):
    """Test adding two new instances via the RAPI instance-multi-alloc method"""
    if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
        return

    JOBS_KEY = "jobs"

    instance_one = qa_config.AcquireInstance()
    instance_two = qa_config.AcquireInstance()
    instance_list = [instance_one, instance_two]
    try:
        rapi_dicts = [
            _GenInstanceAllocationDict(node, i) for i in instance_list
        ]

        job_id = _rapi_client.InstancesMultiAlloc(rapi_dicts)

        results, = _WaitForRapiJob(job_id)

        if JOBS_KEY not in results:
            raise qa_error.Error("RAPI instance-multi-alloc did not deliver "
                                 "information about created jobs")

        if len(results[JOBS_KEY]) != len(instance_list):
            raise qa_error.Error(
                "RAPI instance-multi-alloc failed to return the "
                "desired number of jobs!")

        for success, job in results[JOBS_KEY]:
            if success:
                _WaitForRapiJob(job)
            else:
                raise qa_error.Error("Failed to create instance in "
                                     "instance-multi-alloc call")
    except:
        # Note that although released, it may be that some of the instance creations
        # have in fact succeeded. Handling this in a better way may be possible, but
        # is not necessary as the QA has already failed at this point.
        for instance in instance_list:
            instance.Release()
        raise

    return (instance_one, instance_two)
Ejemplo n.º 24
0
def _StartInstance(name):
    """Starts instance and waits for completion.

  @param name: full name of the instance

  """
    AssertCommand(["gnt-instance", "start", name])

    if not bool(_InstanceRunning(name)):
        raise qa_error.Error("instance start failed")
Ejemplo n.º 25
0
def _ShutdownInstance(name):
    """Shuts down instance without recording state and waits for completion.

  @param name: full name of the instance

  """
    AssertCommand(["gnt-instance", "shutdown", "--no-remember", name])

    if _InstanceRunning(name):
        raise qa_error.Error("instance shutdown failed")
Ejemplo n.º 26
0
    def WaitForCompletion(self):
        """Wait for the completion of all registered jobs.

    """
        while self._HasPendingJobs():
            time.sleep(2)

        with self._lock:
            if self._jobs:
                raise qa_error.Error(
                    "Jobs %s didn't finish in success state!" %
                    self._GetJobIds())
Ejemplo n.º 27
0
  def Validate(self):
    """Validates loaded configuration data.

    """
    if not self.get("name"):
      raise qa_error.Error("Cluster name is required")

    if not self.get("nodes"):
      raise qa_error.Error("Need at least one node")

    if not self.get("instances"):
      raise qa_error.Error("Need at least one instance")

    disks = self.GetDiskOptions()
    if disks is None:
      raise qa_error.Error("Config option 'disks' must exist")
    else:
      for d in disks:
        if d.get("size") is None or d.get("growth") is None:
          raise qa_error.Error("Config options `size` and `growth` must exist"
                               " for all `disks` items")
    check = self.GetInstanceCheckScript()
    if check:
      try:
        os.stat(check)
      except EnvironmentError, err:
        raise qa_error.Error("Can't find instance check script '%s': %s" %
                             (check, err))
Ejemplo n.º 28
0
def ReloadCertificates(ensure_presence=True):
    """Reloads the client RAPI certificate with the one present on the node.

  If the QA is set up to use a specific certificate using the
  "rapi-files-location" parameter, it will be put in place prior to retrieving
  it.

  """
    if ensure_presence:
        _EnsureRapiFilesPresence()

    if _rapi_username is None or _rapi_password is None:
        raise qa_error.Error("RAPI username and password have to be set before"
                             " attempting to reload a certificate.")

    # pylint: disable=W0603
    # due to global usage
    global _rapi_ca
    global _rapi_client

    master = qa_config.GetMasterNode()

    # Load RAPI certificate from master node
    cmd = [
        "openssl", "x509", "-in",
        qa_utils.MakeNodePath(master, pathutils.RAPI_CERT_FILE)
    ]

    # Write to temporary file
    _rapi_ca = tempfile.NamedTemporaryFile()
    _rapi_ca.write(
        qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)))
    _rapi_ca.flush()

    port = qa_config.get("rapi-port", default=constants.DEFAULT_RAPI_PORT)
    cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name, proxy="")

    if qa_config.UseVirtualCluster():
        # TODO: Implement full support for RAPI on virtual clusters
        print qa_logging.FormatWarning(
            "RAPI tests are not yet supported on"
            " virtual clusters and will be disabled")

        assert _rapi_client is None
    else:
        _rapi_client = rapi.client.GanetiRapiClient(master.primary,
                                                    port=port,
                                                    username=_rapi_username,
                                                    password=_rapi_password,
                                                    curl_config_fn=cfg_curl)

        print "RAPI protocol version: %s" % _rapi_client.GetVersion()
Ejemplo n.º 29
0
def _ReadRapiSecret(password_file_path):
    """Reads a RAPI secret stored locally.

  @type password_file_path: string
  @return: Login secret for the user

  """
    try:
        with open(password_file_path, 'r') as pw_file:
            return pw_file.readline().strip()
    except IOError:
        raise qa_error.Error("Could not open the RAPI password file located at"
                             " %s" % password_file_path)
Ejemplo n.º 30
0
def TestClusterVerifyDisksBrokenDRBD(instance, inst_nodes):
    """gnt-cluster verify-disks with broken DRBD"""
    qa_daemon.TestPauseWatcher()

    try:
        info = qa_instance.GetInstanceInfo(instance.name)
        snode = inst_nodes[1]
        for idx, minor in enumerate(info["drbd-minors"][snode.primary]):
            if idx % 2 == 0:
                break_drbd_cmd = \
                  "(drbdsetup %d down >/dev/null 2>&1;" \
                  " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \
                  (minor, minor)
            else:
                break_drbd_cmd = \
                  "(drbdsetup %d detach >/dev/null 2>&1;" \
                  " drbdsetup detach %d >/dev/null 2>&1) || /bin/true" % \
                  (minor, minor)
            AssertCommand(break_drbd_cmd, node=snode)

        verify_output = GetCommandOutput(qa_config.GetMasterNode().primary,
                                         "gnt-cluster verify-disks")
        activation_msg = "Activating disks for instance '%s'" % instance.name
        if activation_msg not in verify_output:
            raise qa_error.Error(
                "gnt-cluster verify-disks did not activate broken"
                " DRBD disks:\n%s" % verify_output)

        verify_output = GetCommandOutput(qa_config.GetMasterNode().primary,
                                         "gnt-cluster verify-disks")
        if activation_msg in verify_output:
            raise qa_error.Error(
                "gnt-cluster verify-disks wants to activate broken"
                " DRBD disks on second attempt:\n%s" % verify_output)

        AssertCommand(_CLUSTER_VERIFY)
    finally:
        qa_daemon.TestResumeWatcher()