예제 #1
0
def TestLiveRepair():
    """Test node evacuate failover upon diagnosis.

  """
    _SetUp('live-repair')
    n = random.randint(10000, 99999)
    node = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
    UploadData(
        node.primary, 'echo \'' + serializer.DumpJson({
            "status": "live-repair",
            "command": "repair",
            "details": str(n)
        }).strip() + '\'', 0755,
        '/etc/ganeti/node-diagnose-commands/live-repair')
    UploadData(
        node.primary, """#!/usr/bin/python
import sys
import json

n = json.loads(sys.stdin.read())['details']
with open('/tmp/' + n, 'w') as f:
  f.write(n)
print 'file written'
""", 0755, '/etc/ganeti/node-repair-commands/repair')
    _AssertRepairCommand()
    tag = _AssertRepairTagAddition(node)
    if str(n) != AssertCommand(["cat", "/tmp/" + str(n)], node=node)[1]:
        raise qa_error.Error('Repair command was unsuccessful')
    node.Release()
    _TearDown(node, tag, [
        '/etc/ganeti/node-diagnose-commands/live-repair',
        '/etc/ganeti/node-repair-commands/repair'
    ], False)
예제 #2
0
def main():
  """Main program.

  """
  colors.check_for_colors()

  parser = optparse.OptionParser(usage="%prog [options] <config-file>")
  parser.add_option("--yes-do-it", dest="yes_do_it",
                    action="store_true",
                    help="Really execute the tests")
  (opts, args) = parser.parse_args()

  if len(args) == 1:
    (config_file, ) = args
  else:
    parser.error("Wrong number of arguments.")

  if not opts.yes_do_it:
    print ("Executing this script irreversibly destroys any Ganeti\n"
           "configuration on all nodes involved. If you really want\n"
           "to start testing, supply the --yes-do-it option.")
    sys.exit(1)

  qa_config.Load(config_file)

  primary = qa_config.GetMasterNode().primary
  qa_utils.StartMultiplexer(primary)
  print ("SSH command for primary node: %s" %
         utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
  print ("SSH command for other nodes: %s" %
         utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
  try:
    RunQa()
  finally:
    qa_utils.CloseMultiplexers()
예제 #3
0
def TestClusterMasterFailoverWithDrainedQueue():
    """gnt-cluster master-failover with drained queue"""
    master = qa_config.GetMasterNode()
    failovermaster = qa_config.AcquireNode(exclude=master)

    # Ensure queue is not drained
    for node in [master, failovermaster]:
        _AssertDrainFile(node, fail=True)

    # Drain queue on failover master
    AssertCommand(["touch", _NodeQueueDrainFile(failovermaster)],
                  node=failovermaster)

    cmd = ["gnt-cluster", "master-failover"]
    try:
        _AssertDrainFile(failovermaster)
        AssertCommand(cmd, node=failovermaster)
        _AssertDrainFile(master, fail=True)
        _AssertDrainFile(failovermaster, fail=True)

        # Back to original master node
        AssertCommand(cmd, node=master)
    finally:
        failovermaster.Release()

    # Ensure queue is not drained
    for node in [master, failovermaster]:
        _AssertDrainFile(node, fail=True)
예제 #4
0
def AssertClusterVerify(fail=False,
                        errors=None,
                        warnings=None,
                        no_warnings=None):
    """Run cluster-verify and check the result, ignoring warnings by default.

  @type fail: bool
  @param fail: if cluster-verify is expected to fail instead of succeeding.
  @type errors: list of tuples
  @param errors: List of CV_XXX errors that are expected; if specified, all the
      errors listed must appear in cluster-verify output. A non-empty value
      implies C{fail=True}.
  @type warnings: list of tuples
  @param warnings: List of CV_XXX warnings that are expected to be raised; if
      specified, all the errors listed must appear in cluster-verify output.
  @type no_warnings: list of tuples
  @param no_warnings: List of CV_XXX warnings that we expect NOT to be raised.
  """
    cvcmd = "gnt-cluster verify"
    mnode = qa_config.GetMasterNode()
    if errors or warnings or no_warnings:
        cvout = GetCommandOutput(mnode.primary,
                                 cvcmd + " --error-codes",
                                 fail=(fail or errors))
        print cvout
        (act_errs, act_warns) = _GetCVErrorCodes(cvout)
        if errors:
            _CheckVerifyErrors(act_errs, errors, "error")
        if warnings:
            _CheckVerifyErrors(act_warns, warnings, "warning")
        if no_warnings:
            _CheckVerifyNoWarnings(act_warns, no_warnings)

    else:
        AssertCommand(cvcmd, fail=fail, node=mnode)
예제 #5
0
def _CreateRapiUser(rapi_user):
    """RAPI credentials creation, with the secret auto-generated.

  """
    rapi_secret = utils.GenerateSecret()

    master = qa_config.GetMasterNode()

    rapi_users_path = qa_utils.MakeNodePath(master, pathutils.RAPI_USERS_FILE)
    rapi_dir = os.path.dirname(rapi_users_path)

    fh = tempfile.NamedTemporaryFile()
    try:
        fh.write("%s %s write\n" % (rapi_user, rapi_secret))
        fh.flush()

        tmpru = qa_utils.UploadFile(master.primary, fh.name)
        try:
            AssertCommand(["mkdir", "-p", rapi_dir])
            AssertCommand(["mv", tmpru, rapi_users_path])
        finally:
            AssertCommand(["rm", "-f", tmpru])
    finally:
        fh.close()

    # The certificates have to be reloaded now
    AssertCommand(["service", "ganeti", "restart"])

    return rapi_secret
예제 #6
0
def TestNodeAddAll():
  """Adding all nodes to cluster."""
  master = qa_config.GetMasterNode()
  for node in qa_config.get("nodes"):
    if node != master:
      NodeAdd(node, readd=False)

  for node in qa_config.get("nodes"):
    def GetNonStartDaemons():
      cmd = utils.ShellQuoteArgs(["ps", "-Ao", "comm"])
      prcs = AssertCommand(cmd, node=node)[1]

      non_start_daemons = []

      def AddIfNotStarted(daemon):
        if daemon not in prcs:
          non_start_daemons.append(daemon)

      AddIfNotStarted('ganeti-noded')
      if constants.ENABLE_MOND:
        AddIfNotStarted('ganeti-mond')
      if node == master:
        AddIfNotStarted('ganeti-wconfd')
        AddIfNotStarted('ganeti-rapi')
        AddIfNotStarted('ganeti-luxid')
        AddIfNotStarted('ganeti-maintd')
      return non_start_daemons

    nsd = GetNonStartDaemons()
    for daemon in nsd:
      raise qa_error.Error(daemon + ' is not running at %s' % node.primary)
예제 #7
0
def TestInterClusterInstanceMove(src_instance,
                                 dest_instance,
                                 inodes,
                                 tnode,
                                 perform_checks=True):
    """Test tools/move-instance"""
    master = qa_config.GetMasterNode()

    rapi_pw_file = tempfile.NamedTemporaryFile()
    rapi_pw_file.write(_rapi_password)
    rapi_pw_file.flush()

    # Needed only if checks are to be performed
    if perform_checks:
        dest_instance.SetDiskTemplate(src_instance.disk_template)

    # TODO: Run some instance tests before moving back

    if len(inodes) > 1:
        # No disk template currently requires more than 1 secondary node. If this
        # changes, either this test must be skipped or the script must be updated.
        assert len(inodes) == 2
        snode = inodes[1]
    else:
        # instance is not redundant, but we still need to pass a node
        # (which will be ignored)
        snode = tnode
    pnode = inodes[0]
    # note: pnode:snode are the *current* nodes, so we move it first to
    # tnode:pnode, then back to pnode:snode
    for current_src_inst, current_dest_inst, target_pnode, target_snode in \
      [(src_instance.name, dest_instance.name, tnode.primary, pnode.primary),
       (dest_instance.name, src_instance.name, pnode.primary, snode.primary)]:
        cmd = [
            "../tools/move-instance",
            "--verbose",
            "--src-ca-file=%s" % _rapi_ca.name,
            "--src-username=%s" % _rapi_username,
            "--src-password-file=%s" % rapi_pw_file.name,
            "--dest-instance-name=%s" % current_dest_inst,
            "--dest-primary-node=%s" % target_pnode,
            "--dest-secondary-node=%s" % target_snode,
            "--net=0:mac=%s" % constants.VALUE_GENERATE,
            master.primary,
            master.primary,
            current_src_inst,
        ]

        # Some uses of this test might require that RAPI-only commands are used,
        # and the checks are command-line based.

        if perform_checks:
            qa_utils.RunInstanceCheck(current_dest_inst, False)

        AssertEqual(StartLocalCommand(cmd).wait(), 0)

        if perform_checks:
            qa_utils.RunInstanceCheck(current_src_inst, False)
            qa_utils.RunInstanceCheck(current_dest_inst, True)
예제 #8
0
def TestClusterBurnin():
    """Burnin"""
    master = qa_config.GetMasterNode()

    options = qa_config.get("options", {})
    disk_template = options.get("burnin-disk-template", constants.DT_DRBD8)
    parallel = options.get("burnin-in-parallel", False)
    check_inst = options.get("burnin-check-instances", False)
    do_rename = options.get("burnin-rename", "")
    do_reboot = options.get("burnin-reboot", True)
    reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)

    # Get as many instances as we need
    instances = []
    try:
        try:
            num = qa_config.get("options", {}).get("burnin-instances", 1)
            for _ in range(0, num):
                instances.append(qa_config.AcquireInstance())
        except qa_error.OutOfInstancesError:
            print "Not enough instances, continuing anyway."

        if len(instances) < 1:
            raise qa_error.Error("Burnin needs at least one instance")

        script = qa_utils.UploadFile(master.primary, "../tools/burnin")
        try:
            disks = qa_config.GetDiskOptions()
            # Run burnin
            cmd = [
                "env",
                "PYTHONPATH=%s" % _constants.VERSIONEDSHAREDIR, script,
                "--os=%s" % qa_config.get("os"),
                "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
                "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
                "--disk-size=%s" % ",".join([d.get("size") for d in disks]),
                "--disk-growth=%s" % ",".join([d.get("growth")
                                               for d in disks]),
                "--disk-template=%s" % disk_template
            ]
            if parallel:
                cmd.append("--parallel")
                cmd.append("--early-release")
            if check_inst:
                cmd.append("--http-check")
            if do_rename:
                cmd.append("--rename=%s" % do_rename)
            if not do_reboot:
                cmd.append("--no-reboot")
            else:
                cmd.append("--reboot-types=%s" % ",".join(reboot_types))
            cmd += [inst.name for inst in instances]
            AssertCommand(cmd)
        finally:
            AssertCommand(["rm", "-f", script])

    finally:
        for inst in instances:
            inst.Release()
예제 #9
0
def TestEmptyCluster():
    """Testing remote API on an empty cluster.

  """
    master = qa_config.GetMasterNode()
    master_full = qa_utils.ResolveNodeName(master)

    def _VerifyInfo(data):
        AssertIn("name", data)
        AssertIn("master", data)
        AssertEqual(data["master"], master_full)

    def _VerifyNodes(data):
        master_entry = {
            "id": master_full,
            "uri": "/2/nodes/%s" % master_full,
        }
        AssertIn(master_entry, data)

    def _VerifyNodesBulk(data):
        for node in data:
            for entry in NODE_FIELDS:
                AssertIn(entry, node)

    def _VerifyGroups(data):
        default_group = {
            "name": constants.INITIAL_NODE_GROUP_NAME,
            "uri": "/2/groups/" + constants.INITIAL_NODE_GROUP_NAME,
        }
        AssertIn(default_group, data)

    def _VerifyGroupsBulk(data):
        for group in data:
            for field in GROUP_FIELDS:
                AssertIn(field, group)

    _DoTests([
        ("/", None, "GET", None),
        ("/2/info", _VerifyInfo, "GET", None),
        ("/2/tags", None, "GET", None),
        ("/2/nodes", _VerifyNodes, "GET", None),
        ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None),
        ("/2/groups", _VerifyGroups, "GET", None),
        ("/2/groups?bulk=1", _VerifyGroupsBulk, "GET", None),
        ("/2/instances", [], "GET", None),
        ("/2/instances?bulk=1", [], "GET", None),
        ("/2/os", None, "GET", None),
    ])

    # Test HTTP Not Found
    for method in ["GET", "PUT", "POST", "DELETE"]:
        try:
            _DoTests([("/99/resource/not/here/99", None, method, None)])
        except rapi.client.GanetiApiError, err:
            AssertEqual(err.code, 404)
        else:
            raise qa_error.Error(
                "Non-existent resource didn't return HTTP 404")
예제 #10
0
def RunCustomSshPortTests():
    """Test accessing nodes with custom SSH ports.

  This requires removing nodes, adding them to a new group, and then undoing
  the change.
  """
    if not qa_config.TestEnabled("group-custom-ssh-port"):
        return

    std_port = netutils.GetDaemonPort(constants.SSH)
    port = 211
    master = qa_config.GetMasterNode()
    with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
        # Checks if the node(s) could be contacted through IPv6.
        # If yes, better skip the whole test.

        for node in nodes:
            if qa_utils.UsesIPv6Connection(node.primary, std_port):
                print(
                    "Node %s is likely to be reached using IPv6,"
                    "skipping the test" % (node.primary, ))
                return

        for node in nodes:
            qa_node.NodeRemove(node)
        with qa_iptables.RulesContext() as r:
            with qa_group.NewGroupCtx() as group:
                qa_group.ModifyGroupSshPort(r, group, nodes, port)

                for node in nodes:
                    qa_node.NodeAdd(node, group=group)

                # Make sure that the cluster doesn't have any pre-existing problem
                qa_cluster.AssertClusterVerify()

                # Create and allocate instances
                instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
                try:
                    instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
                    try:
                        # cluster-verify checks that disks are allocated correctly
                        qa_cluster.AssertClusterVerify()

                        # Remove instances
                        qa_instance.TestInstanceRemove(instance2)
                        qa_instance.TestInstanceRemove(instance1)
                    finally:
                        instance2.Release()
                finally:
                    instance1.Release()

                for node in nodes:
                    qa_node.NodeRemove(node)

        for node in nodes:
            qa_node.NodeAdd(node)

        qa_cluster.AssertClusterVerify()
예제 #11
0
def AssertCommand(cmd, fail=False, node=None, log_cmd=True, forward_agent=True,
                  max_seconds=None):
  """Checks that a remote command succeeds.

  @param cmd: either a string (the command to execute) or a list (to
      be converted using L{utils.ShellQuoteArgs} into a string)
  @type fail: boolean or None
  @param fail: if the command is expected to fail instead of succeeding,
               or None if we don't care
  @param node: if passed, it should be the node on which the command
      should be executed, instead of the master node (can be either a
      dict or a string)
  @param log_cmd: if False, the command won't be logged (simply passed to
      StartSSH)
  @type forward_agent: boolean
  @param forward_agent: whether to forward the agent when starting the SSH
                        session or not, sometimes useful for crypto-related
                        operations which can use a key they should not
  @type max_seconds: double
  @param max_seconds: fail if the command takes more than C{max_seconds}
      seconds
  @return: the return code, stdout and stderr of the command
  @raise qa_error.Error: if the command fails when it shouldn't or vice versa

  """
  if node is None:
    node = qa_config.GetMasterNode()

  nodename = _GetName(node, operator.attrgetter("primary"))

  if isinstance(cmd, basestring):
    cmdstr = cmd
  else:
    cmdstr = utils.ShellQuoteArgs(cmd)

  start = datetime.datetime.now()
  popen = StartSSH(nodename, cmdstr, log_cmd=log_cmd,
                   forward_agent=forward_agent)
  # Run the command
  stdout, stderr = popen.communicate()
  rcode = popen.returncode
  duration_seconds = TimedeltaToTotalSeconds(datetime.datetime.now() - start)

  try:
    if fail is not None:
      _AssertRetCode(rcode, fail, cmdstr, nodename)
  finally:
    if log_cmd:
      _PrintCommandOutput(stdout, stderr)

  if max_seconds is not None:
    if duration_seconds > max_seconds:
      raise qa_error.Error(
        "Cmd '%s' took %f seconds, maximum of %f was exceeded" %
        (cmdstr, duration_seconds, max_seconds))

  return rcode, stdout, stderr
예제 #12
0
def _ResetWatcherDaemon():
    """Removes the watcher daemon's state file.

  """
    path = \
      qa_utils.MakeNodePath(qa_config.GetMasterNode(),
                            pathutils.WATCHER_GROUP_STATE_FILE % "*-*-*-*")

    AssertCommand(["bash", "-c", "rm -vf %s" % path])
예제 #13
0
def _AssertOobCall(verify_path, expected_args):
  """Assert the OOB call was performed with expetected args."""
  master = qa_config.GetMasterNode()

  verify_output_cmd = utils.ShellQuoteArgs(["cat", verify_path])
  output = qa_utils.GetCommandOutput(master.primary, verify_output_cmd,
                                     tty=False)

  AssertEqual(expected_args, output.strip())
예제 #14
0
파일: qa_node.py 프로젝트: sajalcody/ganeti
def TestNodeModify(node):
    """gnt-node modify"""

    default_pool_size = 10
    nodes = qa_config.GetAllNodes()
    test_pool_size = len(nodes) - 1

    # Reduce the number of master candidates, because otherwise all
    # subsequent 'gnt-cluster verify' commands fail due to not enough
    # master candidates.
    AssertCommand(
        ["gnt-cluster", "modify",
         "--candidate-pool-size=%s" % test_pool_size])

    # make sure enough master candidates will be available by disabling the
    # master candidate role first with --auto-promote
    AssertCommand([
        "gnt-node", "modify", "--master-candidate=no", "--auto-promote",
        node.primary
    ])

    # now it's save to force-remove the master candidate role
    for flag in ["master-candidate", "drained", "offline"]:
        for value in ["yes", "no"]:
            AssertCommand([
                "gnt-node", "modify", "--force",
                "--%s=%s" % (flag, value), node.primary
            ])
            AssertCommand(["gnt-cluster", "verify"])

    AssertCommand(
        ["gnt-node", "modify", "--master-candidate=yes", node.primary])

    # Test setting secondary IP address
    AssertCommand([
        "gnt-node", "modify",
        "--secondary-ip=%s" % node.secondary, node.primary
    ])

    AssertRedirectedCommand(["gnt-cluster", "verify"])
    AssertCommand([
        "gnt-cluster", "modify",
        "--candidate-pool-size=%s" % default_pool_size
    ])

    # For test clusters with more nodes than the default pool size,
    # we now have too many master candidates. To readjust to the original
    # size, manually demote all nodes and rely on auto-promotion to adjust.
    if len(nodes) > default_pool_size:
        master = qa_config.GetMasterNode()
        for n in nodes:
            if n.primary != master.primary:
                AssertCommand([
                    "gnt-node", "modify", "--master-candidate=no",
                    "--auto-promote", n.primary
                ])
예제 #15
0
def MarkNodeAddedAll():
  """Mark all nodes as added.

  This is useful if we don't create the cluster ourselves (in qa).

  """
  master = qa_config.GetMasterNode()
  for node in qa_config.get("nodes"):
    if node != master:
      node.MarkAdded()
예제 #16
0
def _TestGroupModifyISpecs(groupname):
  # This test is built on the assumption that the default ipolicy holds for
  # the node group under test
  old_values = _GetGroupIPolicy(groupname)
  samevals = dict((p, 4) for p in constants.ISPECS_PARAMETERS)
  base_specs = {
    constants.ISPECS_MINMAX: [{
      constants.ISPECS_MIN: samevals,
      constants.ISPECS_MAX: samevals,
      }],
    }
  mod_values = _TestGroupSetISpecs(groupname, new_specs=base_specs,
                                   old_values=old_values)
  for par in constants.ISPECS_PARAMETERS:
    # First make sure that the test works with good values
    good_specs = {
      constants.ISPECS_MINMAX: [{
        constants.ISPECS_MIN: {par: 8},
        constants.ISPECS_MAX: {par: 8},
        }],
      }
    mod_values = _TestGroupSetISpecs(groupname, diff_specs=good_specs,
                                     old_values=mod_values)
    bad_specs = {
      constants.ISPECS_MINMAX: [{
        constants.ISPECS_MIN: {par: 8},
        constants.ISPECS_MAX: {par: 4},
        }],
      }
    _TestGroupSetISpecs(groupname, diff_specs=bad_specs, fail=True,
                        old_values=mod_values)
  AssertCommand(["gnt-group", "modify", "--ipolicy-bounds-specs", "default",
                 groupname])
  AssertEqual(_GetGroupIPolicy(groupname), old_values)

  # Get the ipolicy command (from the cluster config)
  mnode = qa_config.GetMasterNode()
  addcmd = GetCommandOutput(mnode.primary, utils.ShellQuoteArgs([
    "gnt-group", "show-ispecs-cmd", "--include-defaults", groupname,
    ]))
  modcmd = ["gnt-group", "modify"]
  opts = addcmd.split()
  assert opts[0:2] == ["gnt-group", "add"]
  for k in range(2, len(opts) - 1):
    if opts[k].startswith("--ipolicy-"):
      assert k + 2 <= len(opts)
      modcmd.extend(opts[k:k + 2])
  modcmd.append(groupname)
  # Apply the ipolicy to the group and verify the result
  AssertCommand(modcmd)
  new_addcmd = GetCommandOutput(mnode.primary, utils.ShellQuoteArgs([
    "gnt-group", "show-ispecs-cmd", groupname,
    ]))
  AssertEqual(addcmd, new_addcmd)
예제 #17
0
파일: qa_job.py 프로젝트: badp/ganeti
def _GetJobStatuses():
    """ Invokes gnt-job list and extracts an id to status dictionary.

  @rtype: dict of string to string
  @return: A dictionary mapping job ids to matching statuses

  """
    master = qa_config.GetMasterNode()
    list_output = GetCommandOutput(
        master.primary, "gnt-job list --no-headers --output=id,status")
    return dict(map(lambda s: s.split(), list_output.splitlines()))
예제 #18
0
def TestPauseWatcher():
    """Tests and pauses the watcher.

  """
    master = qa_config.GetMasterNode()

    AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])

    cmd = ["gnt-cluster", "watcher", "info"]
    output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))
    AssertMatch(output, r"^.*\bis paused\b.*")
예제 #19
0
파일: qa_job.py 프로젝트: kawamuray/ganeti
def TestJobCancellation():
    """gnt-job cancel"""
    # The delay used for the first command should be large enough for the next
    # command and the cancellation command to complete before the first job is
    # done. The second delay should be small enough that not too much time is
    # spend waiting in the case of a failed cancel and a running command.
    FIRST_COMMAND_DELAY = 10.0
    AssertCommand(["gnt-debug", "delay", "--submit", str(FIRST_COMMAND_DELAY)])

    SECOND_COMMAND_DELAY = 3.0
    master = qa_config.GetMasterNode()

    # Forcing tty usage does not work on buildbot, so force all output of this
    # command to be redirected to stdout
    job_id_output = GetCommandOutput(
        master.primary,
        "gnt-debug delay --submit %s 2>&1" % SECOND_COMMAND_DELAY)

    possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output)
    if len(possible_job_ids) != 1:
        raise qa_error.Error(
            "Cannot parse gnt-debug delay output to find job id")

    job_id = possible_job_ids[0]
    AssertCommand(["gnt-job", "cancel", job_id])

    # Now wait until the second job finishes, and expect the watch to fail due to
    # job cancellation
    AssertCommand(["gnt-job", "watch", job_id], fail=True)

    # Then check for job cancellation
    job_status = _GetJobStatus(job_id)
    if job_status != constants.JOB_STATUS_CANCELED:
        # Try and see if the job is being cancelled, and wait until the status
        # changes or we hit a timeout
        if job_status == constants.JOB_STATUS_CANCELING:
            retry_fn = functools.partial(_RetryingFetchJobStatus,
                                         constants.JOB_STATUS_CANCELING,
                                         job_id)
            try:
                # The multiplier to use is arbitrary, setting it higher could prevent
                # flakiness
                WAIT_MULTIPLIER = 4.0
                job_status = retry.Retry(retry_fn, 2.0,
                                         WAIT_MULTIPLIER * FIRST_COMMAND_DELAY)
            except retry.RetryTimeout:
                # The job status remains the same
                pass

        if job_status != constants.JOB_STATUS_CANCELED:
            raise qa_error.Error("Job was not successfully cancelled, status "
                                 "found: %s" % job_status)
예제 #20
0
def ReloadCertificates(ensure_presence=True):
    """Reloads the client RAPI certificate with the one present on the node.

  If the QA is set up to use a specific certificate using the
  "rapi-files-location" parameter, it will be put in place prior to retrieving
  it.

  """
    if ensure_presence:
        _EnsureRapiFilesPresence()

    if _rapi_username is None or _rapi_password is None:
        raise qa_error.Error("RAPI username and password have to be set before"
                             " attempting to reload a certificate.")

    # pylint: disable=W0603
    # due to global usage
    global _rapi_ca
    global _rapi_client

    master = qa_config.GetMasterNode()

    # Load RAPI certificate from master node
    cmd = [
        "openssl", "x509", "-in",
        qa_utils.MakeNodePath(master, pathutils.RAPI_CERT_FILE)
    ]

    # Write to temporary file
    _rapi_ca = tempfile.NamedTemporaryFile()
    _rapi_ca.write(
        qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)))
    _rapi_ca.flush()

    port = qa_config.get("rapi-port", default=constants.DEFAULT_RAPI_PORT)
    cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name, proxy="")

    if qa_config.UseVirtualCluster():
        # TODO: Implement full support for RAPI on virtual clusters
        print qa_logging.FormatWarning(
            "RAPI tests are not yet supported on"
            " virtual clusters and will be disabled")

        assert _rapi_client is None
    else:
        _rapi_client = rapi.client.GanetiRapiClient(master.primary,
                                                    port=port,
                                                    username=_rapi_username,
                                                    password=_rapi_password,
                                                    curl_config_fn=cfg_curl)

        print "RAPI protocol version: %s" % _rapi_client.GetVersion()
예제 #21
0
def GetObjectInfo(infocmd):
    """Get and parse information about a Ganeti object.

  @type infocmd: list of strings
  @param infocmd: command to be executed, e.g. ["gnt-cluster", "info"]
  @return: the information parsed, appropriately stored in dictionaries,
      lists...

  """
    master = qa_config.GetMasterNode()
    cmdline = utils.ShellQuoteArgs(infocmd)
    info_out = GetCommandOutput(master.primary, cmdline)
    return yaml.load(info_out)
예제 #22
0
def _ReadSsconfInstanceList():
    """Reads ssconf_instance_list from the master node.

  """
    master = qa_config.GetMasterNode()

    ssconf_path = utils.PathJoin(pathutils.DATA_DIR,
                                 "ssconf_%s" % constants.SS_INSTANCE_LIST)

    cmd = ["cat", qa_utils.MakeNodePath(master, ssconf_path)]

    return qa_utils.GetCommandOutput(master.primary,
                                     utils.ShellQuoteArgs(cmd)).splitlines()
예제 #23
0
def _InstanceRunning(name):
    """Checks whether an instance is running.

  @param name: full name of the instance

  """
    master = qa_config.GetMasterNode()

    cmd = (
        utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) +
        ' | grep running')
    ret = StartSSH(master.primary, cmd).wait()
    return ret == 0
예제 #24
0
def TestClusterVerifyDisksBrokenDRBD(instance, inst_nodes):
    """gnt-cluster verify-disks with broken DRBD"""
    qa_daemon.TestPauseWatcher()

    try:
        info = qa_instance.GetInstanceInfo(instance.name)
        snode = inst_nodes[1]
        for idx, minor in enumerate(info["drbd-minors"][snode.primary]):
            if idx % 2 == 0:
                break_drbd_cmd = \
                  "(drbdsetup %d down >/dev/null 2>&1;" \
                  " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \
                  (minor, minor)
            else:
                break_drbd_cmd = \
                  "(drbdsetup %d detach >/dev/null 2>&1;" \
                  " drbdsetup detach %d >/dev/null 2>&1) || /bin/true" % \
                  (minor, minor)
            AssertCommand(break_drbd_cmd, node=snode)

        verify_output = GetCommandOutput(qa_config.GetMasterNode().primary,
                                         "gnt-cluster verify-disks")
        activation_msg = "Activating disks for instance '%s'" % instance.name
        if activation_msg not in verify_output:
            raise qa_error.Error(
                "gnt-cluster verify-disks did not activate broken"
                " DRBD disks:\n%s" % verify_output)

        verify_output = GetCommandOutput(qa_config.GetMasterNode().primary,
                                         "gnt-cluster verify-disks")
        if activation_msg in verify_output:
            raise qa_error.Error(
                "gnt-cluster verify-disks wants to activate broken"
                " DRBD disks on second attempt:\n%s" % verify_output)

        AssertCommand(_CLUSTER_VERIFY)
    finally:
        qa_daemon.TestResumeWatcher()
예제 #25
0
def _List(listcmd, fields, names):
  """Runs a list command.

  """
  master = qa_config.GetMasterNode()

  cmd = [listcmd, "list", "--separator=|", "--no-headers",
         "--output", ",".join(fields)]

  if names:
    cmd.extend(names)

  return GetCommandOutput(master.primary,
                          utils.ShellQuoteArgs(cmd)).splitlines()
예제 #26
0
def GetOutputFromMaster(cmd, use_multiplexer=True, log_cmd=True):
  """ Gets the output of a command executed on master.

  """
  if isinstance(cmd, basestring):
    cmdstr = cmd
  else:
    cmdstr = utils.ShellQuoteArgs(cmd)

  # Necessary due to the stderr stream not being captured properly on the
  # buildbot
  cmdstr += " 2>&1"

  return GetCommandOutput(qa_config.GetMasterNode().primary, cmdstr,
                          use_multiplexer=use_multiplexer, log_cmd=log_cmd)
예제 #27
0
def TestClusterMasterFailover():
    """gnt-cluster master-failover"""
    master = qa_config.GetMasterNode()
    failovermaster = qa_config.AcquireNode(exclude=master)

    cmd = ["gnt-cluster", "master-failover"]
    node_list_cmd = ["gnt-node", "list"]
    try:
        AssertCommand(cmd, node=failovermaster)
        AssertCommand(node_list_cmd, node=failovermaster)
        # Back to original master node
        AssertCommand(cmd, node=master)
        AssertCommand(node_list_cmd, node=master)
    finally:
        failovermaster.Release()
예제 #28
0
def _GetInstanceField(instance, field):
    """Get the value of a field of an instance.

  @type instance: string
  @param instance: Instance name
  @type field: string
  @param field: Name of the field
  @rtype: string

  """
    master = qa_config.GetMasterNode()
    infocmd = utils.ShellQuoteArgs([
        "gnt-instance", "list", "--no-headers", "--units", "m", "-o", field,
        instance
    ])
    return qa_utils.GetCommandOutput(master.primary, infocmd).strip()
예제 #29
0
def _CreateOobScriptStructure():
  """Create a simple OOB handling script and its structure."""
  master = qa_config.GetMasterNode()

  data_path = qa_utils.UploadData(master.primary, "")
  verify_path = qa_utils.UploadData(master.primary, "")
  exit_code_path = qa_utils.UploadData(master.primary, "")

  oob_script = (("#!/bin/bash\n"
                 "echo \"$@\" > %s\n"
                 "cat %s\n"
                 "exit $(< %s)\n") %
                (utils.ShellQuote(verify_path), utils.ShellQuote(data_path),
                 utils.ShellQuote(exit_code_path)))
  oob_path = qa_utils.UploadData(master.primary, oob_script, mode=0700)

  return [oob_path, verify_path, data_path, exit_code_path]
예제 #30
0
def TestInstanceReboot(instance):
    """gnt-instance reboot"""
    options = qa_config.get("options", {})
    reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
    name = instance.name
    for rtype in reboot_types:
        AssertCommand(["gnt-instance", "reboot", "--type=%s" % rtype, name])

    AssertCommand(["gnt-instance", "shutdown", name])
    qa_utils.RunInstanceCheck(instance, False)
    AssertCommand(["gnt-instance", "reboot", name])

    master = qa_config.GetMasterNode()
    cmd = ["gnt-instance", "list", "--no-headers", "-o", "status", name]
    result_output = qa_utils.GetCommandOutput(master.primary,
                                              utils.ShellQuoteArgs(cmd))
    AssertEqual(result_output.strip(), constants.INSTST_RUNNING)