def test(self): for hv, const_params in constants.HVC_DEFAULTS.items(): hyp = hypervisor.GetHypervisorClass(hv) for pname in const_params: self.assertTrue(pname in hyp.PARAMETERS, "Hypervisor %s: parameter %s defined in constants" " but not in the permitted hypervisor parameters" % (hv, pname)) for pname in hyp.PARAMETERS: self.assertTrue(pname in const_params, "Hypervisor %s: parameter %s defined in the hypervisor" " but missing a default value" % (hv, pname))
def GetInstanceConsole(cluster, instance, primary_node, node_group): """Returns console information for an instance. @type cluster: L{objects.Cluster} @type instance: L{objects.Instance} @type primary_node: L{objects.Node} @type node_group: L{objects.NodeGroup} @rtype: dict """ hyper = hypervisor.GetHypervisorClass(instance.hypervisor) # beparams and hvparams are passed separately, to avoid editing the # instance and then saving the defaults in the instance itself. hvparams = cluster.FillHV(instance) beparams = cluster.FillBE(instance) console = hyper.GetInstanceConsole(instance, primary_node, node_group, hvparams, beparams) assert console.instance == instance.name console.Validate() return console.ToDict()
def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name cluster = self.cfg.GetClusterInfo() # extra hvparams if self.op.hvparams: # check hypervisor parameter syntax (locally) utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) filled_hvp = cluster.FillHV(self.instance) filled_hvp.update(self.op.hvparams) hv_type = hypervisor.GetHypervisorClass(self.instance.hypervisor) hv_type.CheckParameterSyntax(filled_hvp) CheckHVParams(self, self.cfg.GetInstanceNodes(self.instance.uuid), self.instance.hypervisor, filled_hvp) CheckInstanceState(self, self.instance, INSTANCE_ONLINE) self.primary_offline = \ self.cfg.GetNodeInfo(self.instance.primary_node).offline if self.primary_offline and self.op.ignore_offline_nodes: self.LogWarning("Ignoring offline primary node") if self.op.hvparams or self.op.beparams: self.LogWarning("Overridden parameters are ignored") else: CheckNodeOnline(self, self.instance.primary_node) bep = self.cfg.GetClusterInfo().FillBE(self.instance) bep.update(self.op.beparams) # check bridges existence CheckInstanceBridgesExist(self, self.instance) remote_info = self.rpc.call_instance_info( self.instance.primary_node, self.instance.name, self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor]) remote_info.Raise("Error checking node %s" % self.cfg.GetNodeName(self.instance.primary_node), prereq=True, ecode=errors.ECODE_ENVIRON) self.requires_cleanup = False if remote_info.payload: if _IsInstanceUserDown(self.cfg.GetClusterInfo(), self.instance, remote_info.payload): self.requires_cleanup = True else: # not running already CheckNodeFreeMemory( self, self.instance.primary_node, "starting instance %s" % self.instance.name, bep[constants.BE_MINMEM], self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[ self.instance.hypervisor])
if not found: Err("OS '%s' not found" % self.opts.os) cluster_info = self.cl.QueryClusterInfo() self.cluster_info = cluster_info if not self.cluster_info: Err("Can't get cluster info") default_nic_params = self.cluster_info["nicparams"][ constants.PP_DEFAULT] self.cluster_default_nicparams = default_nic_params if self.hypervisor is None: self.hypervisor = self.cluster_info["default_hypervisor"] self.hv_can_migrate = \ hypervisor.GetHypervisorClass(self.hypervisor).CAN_MIGRATE @_DoCheckInstances @_DoBatch(False) def BurnCreateInstances(self): """Create the given instances. """ self.to_rem = [] mytor = izip(cycle(self.nodes), islice(cycle(self.nodes), 1, None), self.instances) Log("Creating instances") for pnode, snode, instance in mytor: Log("instance %s", instance, indent=1) if self.opts.iallocator:
def _ExecMigration(self): """Migrate an instance. The migrate is done by: - change the disks into dual-master mode - wait until disks are fully synchronized again - migrate the instance - change disks on the new secondary node (the old primary) to secondary - wait until disks are fully synchronized - change disks into single-master mode """ # Check for hypervisor version mismatch and warn the user. hvspecs = [(self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])] nodeinfo = self.rpc.call_node_info( [self.source_node_uuid, self.target_node_uuid], None, hvspecs) for ninfo in nodeinfo.values(): ninfo.Raise("Unable to retrieve node information from node '%s'" % ninfo.node) (_, _, (src_info, )) = nodeinfo[self.source_node_uuid].payload (_, _, (dst_info, )) = nodeinfo[self.target_node_uuid].payload if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and (constants.HV_NODEINFO_KEY_VERSION in dst_info)): src_version = src_info[constants.HV_NODEINFO_KEY_VERSION] dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION] if src_version != dst_version: self.feedback_fn("* warning: hypervisor version mismatch between" " source (%s) and target (%s) node" % (src_version, dst_version)) hv = hypervisor.GetHypervisorClass(self.instance.hypervisor) if hv.VersionsSafeForMigration(src_version, dst_version): self.feedback_fn(" migrating from hypervisor version %s to %s should" " be safe" % (src_version, dst_version)) else: self.feedback_fn(" migrating from hypervisor version %s to %s is" " likely unsupported" % (src_version, dst_version)) if self.ignore_hvversions: self.feedback_fn(" continuing anyway (told to ignore version" " mismatch)") else: raise errors.OpExecError("Unsupported migration between hypervisor" " versions (%s to %s)" % (src_version, dst_version)) self.feedback_fn("* checking disk consistency between source and target") for (idx, dev) in enumerate(self.cfg.GetInstanceDisks(self.instance.uuid)): if not CheckDiskConsistency(self.lu, self.instance, dev, self.target_node_uuid, False): raise errors.OpExecError("Disk %s is degraded or not fully" " synchronized on target node," " aborting migration" % idx) if self.current_mem > self.tgt_free_mem: if not self.allow_runtime_changes: raise errors.OpExecError("Memory ballooning not allowed and not enough" " free memory to fit instance %s on target" " node %s (have %dMB, need %dMB)" % (self.instance.name, self.cfg.GetNodeName(self.target_node_uuid), self.tgt_free_mem, self.current_mem)) self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem) rpcres = self.rpc.call_instance_balloon_memory(self.instance.primary_node, self.instance, self.tgt_free_mem) rpcres.Raise("Cannot modify instance runtime memory") # First get the migration information from the remote node result = self.rpc.call_migration_info(self.source_node_uuid, self.instance) msg = result.fail_msg if msg: log_err = ("Failed fetching source migration information from %s: %s" % (self.cfg.GetNodeName(self.source_node_uuid), msg)) logging.error(log_err) raise errors.OpExecError(log_err) self.migration_info = migration_info = result.payload disks = self.cfg.GetInstanceDisks(self.instance.uuid) self._CloseInstanceDisks(self.target_node_uuid) if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR): # Then switch the disks to master/master mode self._GoStandalone() self._GoReconnect(True) self._WaitUntilSync() self._OpenInstanceDisks(self.source_node_uuid, False) self._OpenInstanceDisks(self.target_node_uuid, False) self.feedback_fn("* preparing %s to accept the instance" % self.cfg.GetNodeName(self.target_node_uuid)) result = self.rpc.call_accept_instance(self.target_node_uuid, self.instance, migration_info, self.nodes_ip[self.target_node_uuid]) msg = result.fail_msg if msg: logging.error("Instance pre-migration failed, trying to revert" " disk status: %s", msg) self.feedback_fn("Pre-migration failed, aborting") self._AbortMigration() self._RevertDiskStatus() raise errors.OpExecError("Could not pre-migrate instance %s: %s" % (self.instance.name, msg)) self.feedback_fn("* migrating instance to %s" % self.cfg.GetNodeName(self.target_node_uuid)) cluster = self.cfg.GetClusterInfo() result = self.rpc.call_instance_migrate( self.source_node_uuid, cluster.cluster_name, self.instance, self.nodes_ip[self.target_node_uuid], self.live) msg = result.fail_msg if msg: logging.error("Instance migration failed, trying to revert" " disk status: %s", msg) self.feedback_fn("Migration failed, aborting") self._AbortMigration() self._RevertDiskStatus() raise errors.OpExecError("Could not migrate instance %s: %s" % (self.instance.name, msg)) self.feedback_fn("* starting memory transfer") last_feedback = time.time() cluster_migration_caps = \ cluster.hvparams.get("kvm", {}).get(constants.HV_KVM_MIGRATION_CAPS, "") migration_caps = \ self.instance.hvparams.get(constants.HV_KVM_MIGRATION_CAPS, cluster_migration_caps) # migration_caps is a ':' delimited string, so checking # if 'postcopy-ram' is a substring also covers using # x-postcopy-ram for QEMU 2.5 postcopy_enabled = "postcopy-ram" in migration_caps while True: result = self.rpc.call_instance_get_migration_status( self.source_node_uuid, self.instance) msg = result.fail_msg ms = result.payload # MigrationStatus instance if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES): logging.error("Instance migration failed, trying to revert" " disk status: %s", msg) self.feedback_fn("Migration failed, aborting") self._AbortMigration() self._RevertDiskStatus() if not msg: msg = "hypervisor returned failure" raise errors.OpExecError("Could not migrate instance %s: %s" % (self.instance.name, msg)) if (postcopy_enabled and ms.status == constants.HV_MIGRATION_ACTIVE and int(ms.dirty_sync_count) >= self._POSTCOPY_SYNC_COUNT_THRESHOLD): self.feedback_fn("* finishing memory transfer with postcopy") self.rpc.call_instance_start_postcopy(self.source_node_uuid, self.instance) if self.instance.hypervisor == 'kvm': migration_active = \ ms.status in constants.HV_KVM_MIGRATION_ACTIVE_STATUSES else: migration_active = \ ms.status == constants.HV_MIGRATION_ACTIVE if not migration_active: self.feedback_fn("* memory transfer complete") break if (utils.TimeoutExpired(last_feedback, self._MIGRATION_FEEDBACK_INTERVAL) and ms.transferred_ram is not None): mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram) self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress) last_feedback = time.time() time.sleep(self._MIGRATION_POLL_INTERVAL) # Always call finalize on both source and target, they should compose # a single operation, consisting of (potentially) parallel steps, that # should be always attempted/retried together (like in _AbortMigration) # without setting any expecetations in what order they execute. result_src = self.rpc.call_instance_finalize_migration_src( self.source_node_uuid, self.instance, True, self.live) result_dst = self.rpc.call_instance_finalize_migration_dst( self.target_node_uuid, self.instance, migration_info, True) err_msg = [] if result_src.fail_msg: logging.error("Instance migration succeeded, but finalization failed" " on the source node: %s", result_src.fail_msg) err_msg.append(self.cfg.GetNodeName(self.source_node_uuid) + ': ' + result_src.fail_msg) if result_dst.fail_msg: logging.error("Instance migration succeeded, but finalization failed" " on the target node: %s", result_dst.fail_msg) err_msg.append(self.cfg.GetNodeName(self.target_node_uuid) + ': ' + result_dst.fail_msg) if err_msg: raise errors.OpExecError( "Could not finalize instance migration: %s" % ' '.join(err_msg)) # Update instance location only after finalize completed. This way, if # either finalize fails, the config still stores the old primary location, # so we can know which instance to delete if we need to (manually) clean up. self.cfg.SetInstancePrimaryNode(self.instance.uuid, self.target_node_uuid) self.instance = self.cfg.GetInstanceInfo(self.instance_uuid) self._CloseInstanceDisks(self.source_node_uuid) disks = self.cfg.GetInstanceDisks(self.instance_uuid) if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR): self._WaitUntilSync() self._GoStandalone() self._GoReconnect(False) self._WaitUntilSync() elif utils.AnyDiskOfType(disks, constants.DTS_EXT_MIRROR): self._OpenInstanceDisks(self.target_node_uuid, True) # If the instance's disk template is `rbd' or `ext' and there was a # successful migration, unmap the device from the source node. unmap_types = (constants.DT_RBD, constants.DT_EXT) if utils.AnyDiskOfType(disks, unmap_types): unmap_disks = [d for d in disks if d.dev_type in unmap_types] disks = ExpandCheckDisks(unmap_disks, unmap_disks) self.feedback_fn("* unmapping instance's disks %s from %s" % (utils.CommaJoin(d.name for d in unmap_disks), self.cfg.GetNodeName(self.source_node_uuid))) for disk in disks: result = self.rpc.call_blockdev_shutdown(self.source_node_uuid, (disk, self.instance)) msg = result.fail_msg if msg: logging.error("Migration was successful, but couldn't unmap the" " block device %s on source node %s: %s", disk.iv_name, self.cfg.GetNodeName(self.source_node_uuid), msg) logging.error("You need to unmap the device %s manually on %s", disk.iv_name, self.cfg.GetNodeName(self.source_node_uuid)) self.feedback_fn("* done")
def ComputeAncillaryFiles(cluster, redist): """Compute files external to Ganeti which need to be consistent. @type redist: boolean @param redist: Whether to include files which need to be redistributed """ # Compute files for all nodes files_all = set([ pathutils.SSH_KNOWN_HOSTS_FILE, pathutils.CONFD_HMAC_KEY, pathutils.CLUSTER_DOMAIN_SECRET_FILE, pathutils.SPICE_CERT_FILE, pathutils.SPICE_CACERT_FILE, pathutils.RAPI_USERS_FILE, ]) if redist: # we need to ship at least the RAPI certificate files_all.add(pathutils.RAPI_CERT_FILE) else: files_all.update(pathutils.ALL_CERT_FILES) files_all.update(ssconf.SimpleStore().GetFileList()) if cluster.modify_etc_hosts: files_all.add(pathutils.ETC_HOSTS) if cluster.use_external_mip_script: files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) # Files which are optional, these must: # - be present in one other category as well # - either exist or not exist on all nodes of that category (mc, vm all) files_opt = set([ pathutils.RAPI_USERS_FILE, ]) # Files which should only be on master candidates files_mc = set() if not redist: files_mc.add(pathutils.CLUSTER_CONF_FILE) # File storage if (not redist and (cluster.IsFileStorageEnabled() or cluster.IsSharedFileStorageEnabled())): files_all.add(pathutils.FILE_STORAGE_PATHS_FILE) files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE) # Files which should only be on VM-capable nodes files_vm = set(filename for hv_name in cluster.enabled_hypervisors for filename in hypervisor.GetHypervisorClass( hv_name).GetAncillaryFiles()[0]) files_opt |= set(filename for hv_name in cluster.enabled_hypervisors for filename in hypervisor.GetHypervisorClass( hv_name).GetAncillaryFiles()[1]) # Filenames in each category must be unique all_files_set = files_all | files_mc | files_vm assert (len(all_files_set) == sum(map(len, [files_all, files_mc, files_vm]))), \ "Found file listed in more than one file list" # Optional files must be present in one other category assert all_files_set.issuperset(files_opt), \ "Optional file not in a different required list" # This one file should never ever be re-distributed via RPC assert not (redist and pathutils.FILE_STORAGE_PATHS_FILE in all_files_set) return (files_all, files_opt, files_mc, files_vm)