コード例 #1
0
 def test(self):
   for hv, const_params in constants.HVC_DEFAULTS.items():
     hyp = hypervisor.GetHypervisorClass(hv)
     for pname in const_params:
       self.assertTrue(pname in hyp.PARAMETERS,
                       "Hypervisor %s: parameter %s defined in constants"
                       " but not in the permitted hypervisor parameters" %
                       (hv, pname))
     for pname in hyp.PARAMETERS:
       self.assertTrue(pname in const_params,
                       "Hypervisor %s: parameter %s defined in the hypervisor"
                       " but missing a default value" %
                       (hv, pname))
コード例 #2
0
def GetInstanceConsole(cluster, instance, primary_node, node_group):
    """Returns console information for an instance.

  @type cluster: L{objects.Cluster}
  @type instance: L{objects.Instance}
  @type primary_node: L{objects.Node}
  @type node_group: L{objects.NodeGroup}
  @rtype: dict

  """
    hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
    # beparams and hvparams are passed separately, to avoid editing the
    # instance and then saving the defaults in the instance itself.
    hvparams = cluster.FillHV(instance)
    beparams = cluster.FillBE(instance)
    console = hyper.GetInstanceConsole(instance, primary_node, node_group,
                                       hvparams, beparams)

    assert console.instance == instance.name
    console.Validate()

    return console.ToDict()
コード例 #3
0
    def CheckPrereq(self):
        """Check prerequisites.

    This checks that the instance is in the cluster.

    """
        self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid)
        assert self.instance is not None, \
          "Cannot retrieve locked instance %s" % self.op.instance_name

        cluster = self.cfg.GetClusterInfo()
        # extra hvparams
        if self.op.hvparams:
            # check hypervisor parameter syntax (locally)
            utils.ForceDictType(self.op.hvparams,
                                constants.HVS_PARAMETER_TYPES)
            filled_hvp = cluster.FillHV(self.instance)
            filled_hvp.update(self.op.hvparams)
            hv_type = hypervisor.GetHypervisorClass(self.instance.hypervisor)
            hv_type.CheckParameterSyntax(filled_hvp)
            CheckHVParams(self, self.cfg.GetInstanceNodes(self.instance.uuid),
                          self.instance.hypervisor, filled_hvp)

        CheckInstanceState(self, self.instance, INSTANCE_ONLINE)

        self.primary_offline = \
          self.cfg.GetNodeInfo(self.instance.primary_node).offline

        if self.primary_offline and self.op.ignore_offline_nodes:
            self.LogWarning("Ignoring offline primary node")

            if self.op.hvparams or self.op.beparams:
                self.LogWarning("Overridden parameters are ignored")
        else:
            CheckNodeOnline(self, self.instance.primary_node)

            bep = self.cfg.GetClusterInfo().FillBE(self.instance)
            bep.update(self.op.beparams)

            # check bridges existence
            CheckInstanceBridgesExist(self, self.instance)

            remote_info = self.rpc.call_instance_info(
                self.instance.primary_node, self.instance.name,
                self.instance.hypervisor,
                cluster.hvparams[self.instance.hypervisor])
            remote_info.Raise("Error checking node %s" %
                              self.cfg.GetNodeName(self.instance.primary_node),
                              prereq=True,
                              ecode=errors.ECODE_ENVIRON)

            self.requires_cleanup = False

            if remote_info.payload:
                if _IsInstanceUserDown(self.cfg.GetClusterInfo(),
                                       self.instance, remote_info.payload):
                    self.requires_cleanup = True
            else:  # not running already
                CheckNodeFreeMemory(
                    self, self.instance.primary_node,
                    "starting instance %s" % self.instance.name,
                    bep[constants.BE_MINMEM], self.instance.hypervisor,
                    self.cfg.GetClusterInfo().hvparams[
                        self.instance.hypervisor])
コード例 #4
0
ファイル: burnin.py プロジェクト: badp/ganeti
        if not found:
            Err("OS '%s' not found" % self.opts.os)

        cluster_info = self.cl.QueryClusterInfo()
        self.cluster_info = cluster_info
        if not self.cluster_info:
            Err("Can't get cluster info")

        default_nic_params = self.cluster_info["nicparams"][
            constants.PP_DEFAULT]
        self.cluster_default_nicparams = default_nic_params
        if self.hypervisor is None:
            self.hypervisor = self.cluster_info["default_hypervisor"]
        self.hv_can_migrate = \
          hypervisor.GetHypervisorClass(self.hypervisor).CAN_MIGRATE

    @_DoCheckInstances
    @_DoBatch(False)
    def BurnCreateInstances(self):
        """Create the given instances.

    """
        self.to_rem = []
        mytor = izip(cycle(self.nodes), islice(cycle(self.nodes), 1, None),
                     self.instances)

        Log("Creating instances")
        for pnode, snode, instance in mytor:
            Log("instance %s", instance, indent=1)
            if self.opts.iallocator:
コード例 #5
0
  def _ExecMigration(self):
    """Migrate an instance.

    The migrate is done by:
      - change the disks into dual-master mode
      - wait until disks are fully synchronized again
      - migrate the instance
      - change disks on the new secondary node (the old primary) to secondary
      - wait until disks are fully synchronized
      - change disks into single-master mode

    """
    # Check for hypervisor version mismatch and warn the user.
    hvspecs = [(self.instance.hypervisor,
                self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])]
    nodeinfo = self.rpc.call_node_info(
                 [self.source_node_uuid, self.target_node_uuid], None, hvspecs)
    for ninfo in nodeinfo.values():
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
                  ninfo.node)
    (_, _, (src_info, )) = nodeinfo[self.source_node_uuid].payload
    (_, _, (dst_info, )) = nodeinfo[self.target_node_uuid].payload

    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
      if src_version != dst_version:
        self.feedback_fn("* warning: hypervisor version mismatch between"
                         " source (%s) and target (%s) node" %
                         (src_version, dst_version))
        hv = hypervisor.GetHypervisorClass(self.instance.hypervisor)
        if hv.VersionsSafeForMigration(src_version, dst_version):
          self.feedback_fn("  migrating from hypervisor version %s to %s should"
                           " be safe" % (src_version, dst_version))
        else:
          self.feedback_fn("  migrating from hypervisor version %s to %s is"
                           " likely unsupported" % (src_version, dst_version))
          if self.ignore_hvversions:
            self.feedback_fn("  continuing anyway (told to ignore version"
                             " mismatch)")
          else:
            raise errors.OpExecError("Unsupported migration between hypervisor"
                                     " versions (%s to %s)" %
                                     (src_version, dst_version))

    self.feedback_fn("* checking disk consistency between source and target")
    for (idx, dev) in enumerate(self.cfg.GetInstanceDisks(self.instance.uuid)):
      if not CheckDiskConsistency(self.lu, self.instance, dev,
                                  self.target_node_uuid,
                                  False):
        raise errors.OpExecError("Disk %s is degraded or not fully"
                                 " synchronized on target node,"
                                 " aborting migration" % idx)

    if self.current_mem > self.tgt_free_mem:
      if not self.allow_runtime_changes:
        raise errors.OpExecError("Memory ballooning not allowed and not enough"
                                 " free memory to fit instance %s on target"
                                 " node %s (have %dMB, need %dMB)" %
                                 (self.instance.name,
                                  self.cfg.GetNodeName(self.target_node_uuid),
                                  self.tgt_free_mem, self.current_mem))
      self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
      rpcres = self.rpc.call_instance_balloon_memory(self.instance.primary_node,
                                                     self.instance,
                                                     self.tgt_free_mem)
      rpcres.Raise("Cannot modify instance runtime memory")

    # First get the migration information from the remote node
    result = self.rpc.call_migration_info(self.source_node_uuid, self.instance)
    msg = result.fail_msg
    if msg:
      log_err = ("Failed fetching source migration information from %s: %s" %
                 (self.cfg.GetNodeName(self.source_node_uuid), msg))
      logging.error(log_err)
      raise errors.OpExecError(log_err)

    self.migration_info = migration_info = result.payload

    disks = self.cfg.GetInstanceDisks(self.instance.uuid)

    self._CloseInstanceDisks(self.target_node_uuid)

    if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
      # Then switch the disks to master/master mode
      self._GoStandalone()
      self._GoReconnect(True)
      self._WaitUntilSync()

    self._OpenInstanceDisks(self.source_node_uuid, False)
    self._OpenInstanceDisks(self.target_node_uuid, False)

    self.feedback_fn("* preparing %s to accept the instance" %
                     self.cfg.GetNodeName(self.target_node_uuid))
    result = self.rpc.call_accept_instance(self.target_node_uuid,
                                           self.instance,
                                           migration_info,
                                           self.nodes_ip[self.target_node_uuid])

    msg = result.fail_msg
    if msg:
      logging.error("Instance pre-migration failed, trying to revert"
                    " disk status: %s", msg)
      self.feedback_fn("Pre-migration failed, aborting")
      self._AbortMigration()
      self._RevertDiskStatus()
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
                               (self.instance.name, msg))

    self.feedback_fn("* migrating instance to %s" %
                     self.cfg.GetNodeName(self.target_node_uuid))
    cluster = self.cfg.GetClusterInfo()
    result = self.rpc.call_instance_migrate(
        self.source_node_uuid, cluster.cluster_name, self.instance,
        self.nodes_ip[self.target_node_uuid], self.live)
    msg = result.fail_msg
    if msg:
      logging.error("Instance migration failed, trying to revert"
                    " disk status: %s", msg)
      self.feedback_fn("Migration failed, aborting")
      self._AbortMigration()
      self._RevertDiskStatus()
      raise errors.OpExecError("Could not migrate instance %s: %s" %
                               (self.instance.name, msg))

    self.feedback_fn("* starting memory transfer")
    last_feedback = time.time()

    cluster_migration_caps = \
      cluster.hvparams.get("kvm", {}).get(constants.HV_KVM_MIGRATION_CAPS, "")
    migration_caps = \
      self.instance.hvparams.get(constants.HV_KVM_MIGRATION_CAPS,
                                 cluster_migration_caps)
    # migration_caps is a ':' delimited string, so checking
    # if 'postcopy-ram' is a substring also covers using
    # x-postcopy-ram for QEMU 2.5
    postcopy_enabled = "postcopy-ram" in migration_caps
    while True:
      result = self.rpc.call_instance_get_migration_status(
                 self.source_node_uuid, self.instance)
      msg = result.fail_msg
      ms = result.payload   # MigrationStatus instance
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
        logging.error("Instance migration failed, trying to revert"
                      " disk status: %s", msg)
        self.feedback_fn("Migration failed, aborting")
        self._AbortMigration()
        self._RevertDiskStatus()
        if not msg:
          msg = "hypervisor returned failure"
        raise errors.OpExecError("Could not migrate instance %s: %s" %
                                 (self.instance.name, msg))

      if (postcopy_enabled
          and ms.status == constants.HV_MIGRATION_ACTIVE
          and int(ms.dirty_sync_count) >= self._POSTCOPY_SYNC_COUNT_THRESHOLD):
        self.feedback_fn("* finishing memory transfer with postcopy")
        self.rpc.call_instance_start_postcopy(self.source_node_uuid,
                                              self.instance)

      if self.instance.hypervisor == 'kvm':
        migration_active = \
          ms.status in constants.HV_KVM_MIGRATION_ACTIVE_STATUSES
      else:
        migration_active = \
          ms.status == constants.HV_MIGRATION_ACTIVE
      if not migration_active:
        self.feedback_fn("* memory transfer complete")
        break

      if (utils.TimeoutExpired(last_feedback,
                               self._MIGRATION_FEEDBACK_INTERVAL) and
          ms.transferred_ram is not None):
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
        last_feedback = time.time()

      time.sleep(self._MIGRATION_POLL_INTERVAL)

    # Always call finalize on both source and target, they should compose
    # a single operation, consisting of (potentially) parallel steps, that
    # should be always attempted/retried together (like in _AbortMigration)
    # without setting any expecetations in what order they execute.
    result_src = self.rpc.call_instance_finalize_migration_src(
        self.source_node_uuid, self.instance, True, self.live)

    result_dst = self.rpc.call_instance_finalize_migration_dst(
        self.target_node_uuid, self.instance, migration_info, True)

    err_msg = []
    if result_src.fail_msg:
      logging.error("Instance migration succeeded, but finalization failed"
                    " on the source node: %s", result_src.fail_msg)
      err_msg.append(self.cfg.GetNodeName(self.source_node_uuid) + ': '
                     + result_src.fail_msg)

    if result_dst.fail_msg:
      logging.error("Instance migration succeeded, but finalization failed"
                    " on the target node: %s", result_dst.fail_msg)
      err_msg.append(self.cfg.GetNodeName(self.target_node_uuid) + ': '
                     + result_dst.fail_msg)

    if err_msg:
      raise errors.OpExecError(
          "Could not finalize instance migration: %s" % ' '.join(err_msg))

    # Update instance location only after finalize completed. This way, if
    # either finalize fails, the config still stores the old primary location,
    # so we can know which instance to delete if we need to (manually) clean up.
    self.cfg.SetInstancePrimaryNode(self.instance.uuid, self.target_node_uuid)
    self.instance = self.cfg.GetInstanceInfo(self.instance_uuid)

    self._CloseInstanceDisks(self.source_node_uuid)
    disks = self.cfg.GetInstanceDisks(self.instance_uuid)
    if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
      self._WaitUntilSync()
      self._GoStandalone()
      self._GoReconnect(False)
      self._WaitUntilSync()
    elif utils.AnyDiskOfType(disks, constants.DTS_EXT_MIRROR):
      self._OpenInstanceDisks(self.target_node_uuid, True)

    # If the instance's disk template is `rbd' or `ext' and there was a
    # successful migration, unmap the device from the source node.
    unmap_types = (constants.DT_RBD, constants.DT_EXT)

    if utils.AnyDiskOfType(disks, unmap_types):
      unmap_disks = [d for d in disks if d.dev_type in unmap_types]
      disks = ExpandCheckDisks(unmap_disks, unmap_disks)
      self.feedback_fn("* unmapping instance's disks %s from %s" %
                       (utils.CommaJoin(d.name for d in unmap_disks),
                        self.cfg.GetNodeName(self.source_node_uuid)))
      for disk in disks:
        result = self.rpc.call_blockdev_shutdown(self.source_node_uuid,
                                                 (disk, self.instance))
        msg = result.fail_msg
        if msg:
          logging.error("Migration was successful, but couldn't unmap the"
                        " block device %s on source node %s: %s",
                        disk.iv_name,
                        self.cfg.GetNodeName(self.source_node_uuid), msg)
          logging.error("You need to unmap the device %s manually on %s",
                        disk.iv_name,
                        self.cfg.GetNodeName(self.source_node_uuid))

    self.feedback_fn("* done")
コード例 #6
0
def ComputeAncillaryFiles(cluster, redist):
    """Compute files external to Ganeti which need to be consistent.

  @type redist: boolean
  @param redist: Whether to include files which need to be redistributed

  """
    # Compute files for all nodes
    files_all = set([
        pathutils.SSH_KNOWN_HOSTS_FILE,
        pathutils.CONFD_HMAC_KEY,
        pathutils.CLUSTER_DOMAIN_SECRET_FILE,
        pathutils.SPICE_CERT_FILE,
        pathutils.SPICE_CACERT_FILE,
        pathutils.RAPI_USERS_FILE,
    ])

    if redist:
        # we need to ship at least the RAPI certificate
        files_all.add(pathutils.RAPI_CERT_FILE)
    else:
        files_all.update(pathutils.ALL_CERT_FILES)
        files_all.update(ssconf.SimpleStore().GetFileList())

    if cluster.modify_etc_hosts:
        files_all.add(pathutils.ETC_HOSTS)

    if cluster.use_external_mip_script:
        files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)

    # Files which are optional, these must:
    # - be present in one other category as well
    # - either exist or not exist on all nodes of that category (mc, vm all)
    files_opt = set([
        pathutils.RAPI_USERS_FILE,
    ])

    # Files which should only be on master candidates
    files_mc = set()

    if not redist:
        files_mc.add(pathutils.CLUSTER_CONF_FILE)

    # File storage
    if (not redist and (cluster.IsFileStorageEnabled()
                        or cluster.IsSharedFileStorageEnabled())):
        files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
        files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)

    # Files which should only be on VM-capable nodes
    files_vm = set(filename for hv_name in cluster.enabled_hypervisors
                   for filename in hypervisor.GetHypervisorClass(
                       hv_name).GetAncillaryFiles()[0])

    files_opt |= set(filename for hv_name in cluster.enabled_hypervisors
                     for filename in hypervisor.GetHypervisorClass(
                         hv_name).GetAncillaryFiles()[1])

    # Filenames in each category must be unique
    all_files_set = files_all | files_mc | files_vm
    assert (len(all_files_set) ==
            sum(map(len, [files_all, files_mc, files_vm]))), \
      "Found file listed in more than one file list"

    # Optional files must be present in one other category
    assert all_files_set.issuperset(files_opt), \
      "Optional file not in a different required list"

    # This one file should never ever be re-distributed via RPC
    assert not (redist and pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)

    return (files_all, files_opt, files_mc, files_vm)