def drop_resize_claim(self, context, instance, instance_type=None, image_meta=None, prefix='new_'): """Remove usage for an incoming/outgoing migration.""" if instance['uuid'] in self.tracked_migrations: migration, itype = self.tracked_migrations.pop(instance['uuid']) if not instance_type: ctxt = context.elevated() instance_type = self._get_instance_type(ctxt, instance, prefix) if image_meta is None: image_meta = utils.get_image_from_system_metadata( instance['system_metadata']) if instance_type['id'] == itype['id']: numa_topology = hardware.numa_get_constraints( itype, image_meta) usage = self._get_usage_dict(itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance, sign=-1) self._update_usage(context, self.compute_node, usage, sign=-1) ctxt = context.elevated() self._update(ctxt, self.compute_node)
def drop_move_claim(self, context, instance, instance_type=None, image_meta=None, prefix='new_'): """Remove usage for an incoming/outgoing migration.""" if instance['uuid'] in self.tracked_migrations: migration, itype = self.tracked_migrations.pop(instance['uuid']) if not instance_type: ctxt = context.elevated() instance_type = self._get_instance_type(ctxt, instance, prefix) if image_meta is None: image_meta = objects.ImageMeta.from_instance(instance) # TODO(jaypipes): Remove when image_meta is always passed # as an objects.ImageMeta elif not isinstance(image_meta, objects.ImageMeta): image_meta = objects.ImageMeta.from_dict(image_meta) if (instance_type is not None and instance_type.id == itype['id']): numa_topology = hardware.numa_get_constraints( itype, image_meta) usage = self._get_usage_dict( itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance, sign=-1) self._update_usage(usage, sign=-1) ctxt = context.elevated() self._update(ctxt)
def drop_resize_claim(self, context, instance, instance_type=None, image_meta=None, prefix='new_'): """Remove usage for an incoming/outgoing migration.""" if instance['uuid'] in self.tracked_migrations: migration, itype = self.tracked_migrations.pop(instance['uuid']) if not instance_type: ctxt = context.elevated() instance_type = self._get_instance_type(ctxt, instance, prefix) if image_meta is None: image_meta = utils.get_image_from_system_metadata( instance['system_metadata']) if instance_type['id'] == itype['id']: numa_topology = hardware.numa_get_constraints( itype, image_meta) usage = self._get_usage_dict( itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance, sign=-1) self._update_usage(context, self.compute_node, usage, sign=-1) ctxt = context.elevated() self._update(ctxt, self.compute_node)
def _update_usage_from_migration(self, context, instance, image_meta, migration): """Update usage for a single migration. The record may represent an incoming or outbound migration. """ uuid = migration.instance_uuid LOG.info(_LI("Updating from migration %s") % uuid) incoming = migration.dest_compute == self.host and migration.dest_node == self.nodename outbound = migration.source_compute == self.host and migration.source_node == self.nodename same_node = incoming and outbound record = self.tracked_instances.get(uuid, None) itype = None if same_node: # same node resize. record usage for whichever instance type the # instance is *not* in: if instance["instance_type_id"] == migration.old_instance_type_id: itype = self._get_instance_type(context, instance, "new_", migration.new_instance_type_id) else: # instance record already has new flavor, hold space for a # possible revert to the old instance type: itype = self._get_instance_type(context, instance, "old_", migration.old_instance_type_id) elif incoming and not record: # instance has not yet migrated here: itype = self._get_instance_type(context, instance, "new_", migration.new_instance_type_id) elif outbound and not record: # instance migrated, but record usage for a possible revert: itype = self._get_instance_type(context, instance, "old_", migration.old_instance_type_id) if image_meta is None: image_meta = objects.ImageMeta.from_instance(instance) # TODO(jaypipes): Remove when image_meta is always passed # as an objects.ImageMeta elif not isinstance(image_meta, objects.ImageMeta): image_meta = objects.ImageMeta.from_dict(image_meta) if itype: host_topology = self.compute_node.get("numa_topology") if host_topology: host_topology = objects.NUMATopology.obj_from_db_obj(host_topology) numa_topology = hardware.numa_get_constraints(itype, image_meta) numa_topology = hardware.numa_fit_instance_to_host(host_topology, numa_topology) usage = self._get_usage_dict(itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance) self._update_usage(usage) if self.pci_tracker: obj = self.pci_tracker.stats.to_device_pools_obj() self.compute_node.pci_device_pools = obj else: obj = objects.PciDevicePoolList() self.compute_node.pci_device_pools = obj self.tracked_migrations[uuid] = (migration, itype)
def _get_instance_vnuma_config(self, instance, image_meta): """Returns the appropriate NUMA configuration for Hyper-V instances, given the desired instance NUMA topology. :param instance: instance containing the flavor and it's extra_specs, where the NUMA topology is defined. :param image_meta: image's metadata, containing properties related to the instance's NUMA topology. :returns: memory amount and number of vCPUs per NUMA node or (None, None), if instance NUMA topology was not requested. :raises exception.InstanceUnacceptable: If the given instance NUMA topology is not possible on Hyper-V, or if CPU pinning is required. """ instance_topology = hardware.numa_get_constraints(instance.flavor, image_meta) if not instance_topology: # instance NUMA topology was not requested. return None, None memory_per_numa_node = instance_topology.cells[0].memory cpus_per_numa_node = len(instance_topology.cells[0].cpuset) # TODO(stephenfin): We can avoid this check entirely if we rely on the # 'supports_pcpus' driver capability (via a trait), but we need to drop # support for the legacy 'vcpu_pin_set' path in the libvirt driver # first if instance_topology.cpu_policy not in ( None, fields.CPUAllocationPolicy.SHARED, ): raise exception.InstanceUnacceptable( reason=_("Hyper-V does not support CPU pinning."), instance_id=instance.uuid) # validate that the requested NUMA topology is not asymetric. # e.g.: it should be like: (X cpus, X cpus, Y cpus), where X == Y. # same with memory. for cell in instance_topology.cells: if len(cell.cpuset) != cpus_per_numa_node: reason = _("Hyper-V does not support NUMA topologies with " "uneven number of processors. (%(a)s != %(b)s)") % { 'a': len(cell.cpuset), 'b': cpus_per_numa_node} raise exception.InstanceUnacceptable(reason=reason, instance_id=instance.uuid) if cell.memory != memory_per_numa_node: reason = _("Hyper-V does not support NUMA topologies with " "uneven amounts of memory. (%(a)s != %(b)s)") % { 'a': cell.memory, 'b': memory_per_numa_node} raise exception.InstanceUnacceptable(reason=reason, instance_id=instance.uuid) return memory_per_numa_node, cpus_per_numa_node
def _get_instance_vnuma_config(self, instance, image_meta): """Returns the appropriate NUMA configuration for Hyper-V instances, given the desired instance NUMA topology. :param instance: instance containing the flavor and it's extra_specs, where the NUMA topology is defined. :param image_meta: image's metadata, containing properties related to the instance's NUMA topology. :returns: memory amount and number of vCPUs per NUMA node or (None, None), if instance NUMA topology was not requested. :raises exception.InstanceUnacceptable: If the given instance NUMA topology is not possible on Hyper-V, or if CPU pinning is required. """ instance_topology = hardware.numa_get_constraints( instance.flavor, image_meta) if not instance_topology: # instance NUMA topology was not requested. return None, None memory_per_numa_node = instance_topology.cells[0].memory cpus_per_numa_node = len(instance_topology.cells[0].cpuset) if instance_topology.cpu_pinning_requested: raise exception.InstanceUnacceptable( reason=_("Hyper-V does not support CPU pinning."), instance_id=instance.uuid) # validate that the requested NUMA topology is not asymetric. # e.g.: it should be like: (X cpus, X cpus, Y cpus), where X == Y. # same with memory. for cell in instance_topology.cells: if len(cell.cpuset) != cpus_per_numa_node: reason = _("Hyper-V does not support NUMA topologies with " "uneven number of processors. (%(a)s != %(b)s)") % { 'a': len(cell.cpuset), 'b': cpus_per_numa_node } raise exception.InstanceUnacceptable(reason=reason, instance_id=instance.uuid) if cell.memory != memory_per_numa_node: reason = _("Hyper-V does not support NUMA topologies with " "uneven amounts of memory. (%(a)s != %(b)s)") % { 'a': cell.memory, 'b': memory_per_numa_node } raise exception.InstanceUnacceptable(reason=reason, instance_id=instance.uuid) return memory_per_numa_node, cpus_per_numa_node
def _update_usage_from_migration(self, context, instance, image_meta, resources, migration): """Update usage for a single migration. The record may represent an incoming or outbound migration. """ uuid = migration.instance_uuid LOG.info(_LI("Updating from migration %s") % uuid) incoming = migration.dest_compute == self.host and migration.dest_node == self.nodename outbound = migration.source_compute == self.host and migration.source_node == self.nodename same_node = incoming and outbound record = self.tracked_instances.get(uuid, None) itype = None if same_node: # same node resize. record usage for whichever instance type the # instance is *not* in: if instance["instance_type_id"] == migration.old_instance_type_id: itype = self._get_instance_type(context, instance, "new_", migration.new_instance_type_id) else: # instance record already has new flavor, hold space for a # possible revert to the old instance type: itype = self._get_instance_type(context, instance, "old_", migration.old_instance_type_id) elif incoming and not record: # instance has not yet migrated here: itype = self._get_instance_type(context, instance, "new_", migration.new_instance_type_id) elif outbound and not record: # instance migrated, but record usage for a possible revert: itype = self._get_instance_type(context, instance, "old_", migration.old_instance_type_id) if image_meta is None: image_meta = utils.get_image_from_system_metadata(instance["system_metadata"]) if itype: host_topology = resources.get("numa_topology") if host_topology: host_topology = objects.NUMATopology.obj_from_db_obj(host_topology) numa_topology = hardware.numa_get_constraints(itype, image_meta) numa_topology = hardware.numa_fit_instance_to_host(host_topology, numa_topology) usage = self._get_usage_dict(itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance) self._update_usage(context, resources, usage) if self.pci_tracker: resources["pci_device_pools"] = self.pci_tracker.stats else: resources["pci_device_pools"] = [] self.tracked_migrations[uuid] = (migration, itype)
def _get_instance_vnuma_config(self, instance, image_meta): """Returns the appropriate NUMA configuration for Hyper-V instances, given the desired instance NUMA topology. :param instance: instance containing the flavor and it's extra_specs, where the NUMA topology is defined. :param image_meta: image's metadata, containing properties related to the instance's NUMA topology. :returns: memory amount and number of vCPUs per NUMA node or (None, None), if instance NUMA topology was not requested. :raises exception.InstanceUnacceptable: If the given instance NUMA topology is not possible on Hyper-V, or if CPU pinning is required. """ instance_topology = hardware.numa_get_constraints(instance.flavor, image_meta) if not instance_topology: # instance NUMA topology was not requested. return None, None memory_per_numa_node = instance_topology.cells[0].memory cpus_per_numa_node = len(instance_topology.cells[0].cpuset) if instance_topology.cpu_pinning_requested: raise exception.InstanceUnacceptable( reason=_("Hyper-V does not support CPU pinning."), instance_id=instance.uuid) # validate that the requested NUMA topology is not asymetric. # e.g.: it should be like: (X cpus, X cpus, Y cpus), where X == Y. # same with memory. for cell in instance_topology.cells: if len(cell.cpuset) != cpus_per_numa_node: reason = _("Hyper-V does not support NUMA topologies with " "uneven number of processors. (%(a)s != %(b)s)") % { 'a': len(cell.cpuset), 'b': cpus_per_numa_node} raise exception.InstanceUnacceptable(reason=reason, instance_id=instance.uuid) if cell.memory != memory_per_numa_node: reason = _("Hyper-V does not support NUMA topologies with " "uneven amounts of memory. (%(a)s != %(b)s)") % { 'a': cell.memory, 'b': memory_per_numa_node} raise exception.InstanceUnacceptable(reason=reason, instance_id=instance.uuid) return memory_per_numa_node, cpus_per_numa_node
def _get_instance_vnuma_config(self, instance, image_meta): """Returns the appropriate NUMA configuration for Hyper-V instances, given the desired instance NUMA topology. :param instance: instance containing the flavor and it's extra_specs, where the NUMA topology is defined. :param image_meta: image's metadata, containing properties related to the instance's NUMA topology. :returns: memory amount and number of vCPUs per NUMA node or (None, None), if instance NUMA topology was not requested. :raises exception.InstanceUnacceptable: If the given instance NUMA topology is not possible on Hyper-V. """ image_meta = objects.ImageMeta.from_dict(image_meta) instance_topology = hardware.numa_get_constraints( instance.flavor, image_meta) if not instance_topology: # instance NUMA topology was not requested. return None, None memory_per_numa_node = instance_topology.cells[0].memory cpus_per_numa_node = len(instance_topology.cells[0].cpuset) cpus_pinned = instance_topology.cells[0].cpu_pinning is not None if cpus_pinned: raise exception.InstanceUnacceptable( reason="Hyper-V cannot guarantee the CPU pinning.", instance_id=instance.uuid) # validate that the requested NUMA topology is not asymetric. # e.g.: it should be like: (X cpus, X cpus, Y cpus), where X == Y. # same with memory. for cell in instance_topology.cells: if (len(cell.cpuset) != cpus_per_numa_node or cell.memory != memory_per_numa_node): raise exception.InstanceUnacceptable( reason="Hyper-V cannot guarantee the given instance NUMA " "topology.", instance_id=instance.uuid) return memory_per_numa_node, cpus_per_numa_node
def _get_instance_vnuma_config(self, instance, image_meta): """Returns the appropriate NUMA configuration for Hyper-V instances, given the desired instance NUMA topology. :param instance: instance containing the flavor and it's extra_specs, where the NUMA topology is defined. :param image_meta: image's metadata, containing properties related to the instance's NUMA topology. :returns: memory amount and number of vCPUs per NUMA node or (None, None), if instance NUMA topology was not requested. :raises exception.InstanceUnacceptable: If the given instance NUMA topology is not possible on Hyper-V. """ image_meta = objects.ImageMeta.from_dict(image_meta) instance_topology = hardware.numa_get_constraints(instance.flavor, image_meta) if not instance_topology: # instance NUMA topology was not requested. return None, None memory_per_numa_node = instance_topology.cells[0].memory cpus_per_numa_node = len(instance_topology.cells[0].cpuset) cpus_pinned = instance_topology.cells[0].cpu_pinning is not None if cpus_pinned: raise exception.InstanceUnacceptable( reason="Hyper-V cannot guarantee the CPU pinning.", instance_id=instance.uuid) # validate that the requested NUMA topology is not asymetric. # e.g.: it should be like: (X cpus, X cpus, Y cpus), where X == Y. # same with memory. for cell in instance_topology.cells: if (len(cell.cpuset) != cpus_per_numa_node or cell.memory != memory_per_numa_node): raise exception.InstanceUnacceptable( reason="Hyper-V cannot guarantee the given instance NUMA " "topology.", instance_id=instance.uuid) return memory_per_numa_node, cpus_per_numa_node
def numa_topology(self): image_meta = objects.ImageMeta.from_dict(self.image_meta) return hardware.numa_get_constraints( self.instance_type, image_meta)
def _update_usage_from_migration(self, context, instance, image_meta, resources, migration): """Update usage for a single migration. The record may represent an incoming or outbound migration. """ uuid = migration['instance_uuid'] LOG.audit(_("Updating from migration %s") % uuid) incoming = (migration['dest_compute'] == self.host and migration['dest_node'] == self.nodename) outbound = (migration['source_compute'] == self.host and migration['source_node'] == self.nodename) same_node = (incoming and outbound) record = self.tracked_instances.get(uuid, None) itype = None if same_node: # same node resize. record usage for whichever instance type the # instance is *not* in: if (instance['instance_type_id'] == migration['old_instance_type_id']): itype = self._get_instance_type(context, instance, 'new_', migration['new_instance_type_id']) else: # instance record already has new flavor, hold space for a # possible revert to the old instance type: itype = self._get_instance_type(context, instance, 'old_', migration['old_instance_type_id']) elif incoming and not record: # instance has not yet migrated here: itype = self._get_instance_type(context, instance, 'new_', migration['new_instance_type_id']) elif outbound and not record: # instance migrated, but record usage for a possible revert: itype = self._get_instance_type(context, instance, 'old_', migration['old_instance_type_id']) if image_meta is None: image_meta = utils.get_image_from_system_metadata( instance['system_metadata']) if itype: host_topology = resources.get('numa_topology') if host_topology: host_topology = objects.NUMATopology.obj_from_db_obj( host_topology) numa_topology = hardware.numa_get_constraints(itype, image_meta) numa_topology = ( hardware.numa_fit_instance_to_host( host_topology, numa_topology)) usage = self._get_usage_dict( itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance) self._update_usage(context, resources, usage) if self.pci_tracker: resources['pci_stats'] = jsonutils.dumps( self.pci_tracker.stats) else: resources['pci_stats'] = jsonutils.dumps([]) self.tracked_migrations[uuid] = (migration, itype)
def select_destinations(self, ctxt, request_spec=None, filter_properties=None, spec_obj=_sentinel, instance_uuids=None): """Returns destinations(s) best suited for this RequestSpec. The result should be a list of dicts with 'host', 'nodename' and 'limits' as keys. """ LOG.debug("Starting to schedule for instances: %s", instance_uuids) # TODO(sbauza): Change the method signature to only accept a spec_obj # argument once API v5 is provided. if spec_obj is self._sentinel: spec_obj = objects.RequestSpec.from_primitives( ctxt, request_spec, filter_properties) resources = utils.resources_from_request_spec(spec_obj) # WRS: Determine resources consumed for placement candidate check, vcpus = spec_obj.flavor.vcpus extra_specs = spec_obj.flavor.extra_specs image_props = spec_obj.image.properties # WRS: The request_spec has stale numa_topology, so must be updated. # We can get stale numa_topology if we do an evacuation or # live-migration after a resize, instance_type = spec_obj.flavor image_meta = objects.ImageMeta(properties=image_props) try: spec_obj.numa_topology = \ hardware.numa_get_constraints(instance_type, image_meta) except Exception as ex: LOG.error("Cannot get numa constraints, error=%(err)r", {'err': ex}) instance_numa_topology = spec_obj.numa_topology # WRS: If cpu_thread_policy is ISOLATE and compute has hyperthreading # enabled, vcpus claim will be double flavor.vcpus. Since we don't # know the compute node at this point, we'll just request flavor.vcpus # and let the numa_topology filter sort this out. numa_cell = objects.NUMACell(siblings=[]) numa_topology = objects.NUMATopology(cells=[numa_cell])._to_json() computenode = objects.ComputeNode(numa_topology=numa_topology) normalized_resources = \ utils.normalized_resources_for_placement_claim( resources, computenode, vcpus, extra_specs, image_props, instance_numa_topology) alloc_reqs_by_rp_uuid, provider_summaries = None, None if self.driver.USES_ALLOCATION_CANDIDATES: res = self.placement_client.get_allocation_candidates( normalized_resources) if res is None: # We have to handle the case that we failed to connect to the # Placement service and the safe_connect decorator on # get_allocation_candidates returns None. alloc_reqs, provider_summaries = None, None else: alloc_reqs, provider_summaries = res if not alloc_reqs: LOG.debug("Got no allocation candidates from the Placement " "API. This may be a temporary occurrence as compute " "nodes start up and begin reporting inventory to " "the Placement service.") # Determine the rejection reasons for all hosts based on # placement vcpu, memory, and disk criteria. This is done # after-the-fact since the placement query does not return # any reasons. reasons = self.placement_client.get_rejection_reasons( requested=normalized_resources) if reasons is None: reasons = {} # Populate per-host rejection map based on placement criteria. host_states = self.driver.host_manager.get_all_host_states( ctxt) for host_state in host_states: if host_state.uuid in reasons: msg = reasons[host_state.uuid] if msg: nova_utils.filter_reject('Placement', host_state, spec_obj, msg, append=False) reason = 'Placement service found no hosts.' filter_properties = spec_obj.to_legacy_filter_properties_dict() utils.NoValidHost_extend(filter_properties, reason=reason) else: # Build a dict of lists of allocation requests, keyed by # provider UUID, so that when we attempt to claim resources for # a host, we can grab an allocation request easily alloc_reqs_by_rp_uuid = collections.defaultdict(list) for ar in alloc_reqs: for rr in ar['allocations']: rp_uuid = rr['resource_provider']['uuid'] alloc_reqs_by_rp_uuid[rp_uuid].append(ar) dests = self.driver.select_destinations(ctxt, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid, provider_summaries) dest_dicts = [_host_state_obj_to_dict(d) for d in dests] return jsonutils.to_primitive(dest_dicts)
def numa_topology(self): return hardware.numa_get_constraints( self.instance_type, self.image_meta)
def numa_topology(self): return hardware.numa_get_constraints(self.instance_type, self.image_meta)
def numa_topology(self): return hardware.numa_get_constraints(self.flavor, self.image_meta)
def _update_usage_from_migration(self, context, instance, image_meta, migration): """Update usage for a single migration. The record may represent an incoming or outbound migration. """ if not self._is_trackable_migration(migration): return uuid = migration.instance_uuid LOG.info(_LI("Updating from migration %s") % uuid) incoming = (migration.dest_compute == self.host and migration.dest_node == self.nodename) outbound = (migration.source_compute == self.host and migration.source_node == self.nodename) same_node = (incoming and outbound) record = self.tracked_instances.get(uuid, None) itype = None if same_node: # same node resize. record usage for whichever instance type the # instance is *not* in: if (instance['instance_type_id'] == migration.old_instance_type_id): itype = self._get_instance_type(context, instance, 'new_', migration.new_instance_type_id) else: # instance record already has new flavor, hold space for a # possible revert to the old instance type: itype = self._get_instance_type(context, instance, 'old_', migration.old_instance_type_id) elif incoming and not record: # instance has not yet migrated here: itype = self._get_instance_type(context, instance, 'new_', migration.new_instance_type_id) elif outbound and not record: # instance migrated, but record usage for a possible revert: itype = self._get_instance_type(context, instance, 'old_', migration.old_instance_type_id) if image_meta is None: image_meta = objects.ImageMeta.from_instance(instance) # TODO(jaypipes): Remove when image_meta is always passed # as an objects.ImageMeta elif not isinstance(image_meta, objects.ImageMeta): image_meta = objects.ImageMeta.from_dict(image_meta) if itype: host_topology = self.compute_node.get('numa_topology') if host_topology: host_topology = objects.NUMATopology.obj_from_db_obj( host_topology) numa_topology = hardware.numa_get_constraints(itype, image_meta) numa_topology = ( hardware.numa_fit_instance_to_host( host_topology, numa_topology)) usage = self._get_usage_dict( itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance) self._update_usage(usage) if self.pci_tracker: obj = self.pci_tracker.stats.to_device_pools_obj() self.compute_node.pci_device_pools = obj else: obj = objects.PciDevicePoolList() self.compute_node.pci_device_pools = obj self.tracked_migrations[uuid] = (migration, itype)
def _schedule(self, context, request_spec, filter_properties): """Returns a list of hosts that meet the required specs, ordered by their fitness. """ elevated = context.elevated() instance_properties = request_spec['instance_properties'] instance_type = request_spec.get("instance_type", None) instance_uuids = request_spec.get("instance_uuids", None) LOG.debug("[HINTS] filter_properties=%s" % filter_properties) # query scheduler_hints from database, and skip what in the parameters. if instance_uuids: inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance_uuids[0]) if inst_extra: scheduler_hints = jsonutils.loads( inst_extra.scheduler_hints or '{}') stats = jsonutils.loads(inst_extra.stats or '{}') else: scheduler_hints = {} stats = {} LOG.debug("[HINTS] Got scheduler_hints via db. " "scheduler_hints=%s" % scheduler_hints) filter_properties['scheduler_hints'] = scheduler_hints filter_properties['stats'] = stats instance_properties['stats'] = stats try: update_group_hosts = self._setup_instance_group(context,filter_properties) except exception.InstanceGroupNotFound as e: # InstanceGroup has already checked in API, # might has been deleted when migrate/ha LOG.warning("ServerGroup %s doesn't exist" % scheduler_hints.get('group', "None")) update_group_hosts = False config_options = self._get_configuration_options() filter_properties.update({'context': context, 'request_spec': request_spec, 'config_options': config_options, 'instance_type': instance_type}) self.populate_filter_properties(request_spec, filter_properties) # Find our local list of acceptable hosts by repeatedly # filtering and weighing our options. Each time we choose a # host, we virtually consume resources on it so subsequent # selections can adjust accordingly. # Note: remember, we are using an iterator here. So only # traverse this list once. This can bite you if the hosts # are being scanned in a filter or weighing function. hosts = self._get_all_host_states(elevated) selected_hosts = [] if instance_uuids: num_instances = len(instance_uuids) else: num_instances = request_spec.get('num_instances', 1) for num in xrange(num_instances): #NOTE: add a tracker of filter tracker = HuaweiFilterTracker() filter_properties['__tracker'] = tracker # Filter local hosts based on requirements ... hosts = self.host_manager.get_filtered_hosts(hosts, filter_properties, index=num) if not hosts: # Can't get any more locally. break LOG.debug("Filtered %(hosts)s", {'hosts': hosts}) weighed_hosts = self.host_manager.get_weighed_hosts( hosts, filter_properties) LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts}) scheduler_host_subset_size = CONF.scheduler_host_subset_size if scheduler_host_subset_size > len(weighed_hosts): scheduler_host_subset_size = len(weighed_hosts) if scheduler_host_subset_size < 1: scheduler_host_subset_size = 1 chosen_host = random.choice( weighed_hosts[0:scheduler_host_subset_size]) host_mapper = dict() for host in weighed_hosts: host_mapper[host.obj.host] = host if 'resize_prefer_to_same_host' in filter_properties: origin_host = filter_properties['resize_prefer_to_same_host'] chosen_host = host_mapper.get(origin_host, chosen_host) migrate_host = filter_properties.get('migrate_host') if migrate_host: if migrate_host in host_mapper: chosen_host = host_mapper.get(migrate_host) else: # migrate_host not in filter hosts list # raise NoVaildHost break selected_hosts.append(chosen_host) # Now consume the resources so the filter/weights # will change for the next instance. # NOTE () adding and deleting pci_requests is a temporary # fix to avoid DB access in consume_from_instance() while getting # pci_requests. The change can be removed once pci_requests is # part of the instance object that is passed into the scheduler # APIs pci_requests = filter_properties.get('pci_requests') if pci_requests: instance_properties['pci_requests'] = pci_requests if request_spec.get('instance_type'): instance_properties['numa_topology'] = \ hardware.numa_get_constraints(instance_type, {}) self._update_instance_topology(instance_properties, chosen_host) try: bind_info, instance_numa, __ = utils.get_inst_cpu_bind_info( instance_properties, chosen_host.obj, filter_properties=filter_properties) except exception.NovaException as ex: msg = ("Get cpu binding info on host %(host)s failed, the" " host_numa_top is %(host_numa_top)s, " "instance_properties is %(instance_properties)s") params = {'host': chosen_host.obj.host, 'host_numa_top': chosen_host.obj.numa_topology, 'instance_properties': instance_properties} # set bind_info and instance_numa is None bind_info = None instance_numa = None LOG.debug(_LE(msg), params) LOG.debug(_LE(ex.format_message())) scheduler_hints = filter_properties.get('scheduler_hints', None) if instance_numa and instance_numa['cells'][0].get('is_huawei'): cells = [] for cell in instance_numa['cells']: cells.append(objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology(cells=cells) instance_properties['numa_topology'] = format_inst_numa try: if isinstance(chosen_host.obj, ironic_host_manager.IronicNodeState): chosen_host.obj.consume_from_instance(instance_properties) else: chosen_host.obj.consume_from_instance(instance_properties, filter_properties) except exception.PciDeviceRequestFailed as e: # pop the select chosen host in order to rollback resource in # memory LOG.warning("consume get exception: %s", e.format_message()) rollback_hosts = [chosen_host] self.host_manager.force_update_host_states(context, rollback_hosts) if pci_requests: del instance_properties['pci_requests'] if update_group_hosts is True: # NOTE(): Group details are serialized into a list now # that they are populated by the conductor, we need to # deserialize them if isinstance(filter_properties['group_hosts'], list): filter_properties['group_hosts'] = set( filter_properties['group_hosts']) self._check_fulfill_for_multiple_create(context, num_instances, selected_hosts) return selected_hosts
def _validate_numa_node(flavor): NUMA_NODES_KEY = 'hw:numa_nodes' NUMA_NODE_PREFIX = 'hw:numa_node.' specs = flavor.extra_specs try: hw_numa_nodes = int(specs.get(NUMA_NODES_KEY, 1)) except ValueError: msg = _('hw:numa_nodes value must be an integer') raise webob.exc.HTTPBadRequest(explanation=msg) if hw_numa_nodes < 1: msg = _('hw:numa_nodes value must be greater than 0') raise webob.exc.HTTPBadRequest(explanation=msg) for key in specs: if key.startswith(NUMA_NODE_PREFIX): # NUMA pinning not allowed when CPU policy is shared if (specs.get(CPU_POLICY_KEY) == fields.CPUAllocationPolicy.SHARED): msg = _('hw:numa_node not permitted when cpu policy ' 'is set to shared') raise webob.exc.HTTPConflict(explanation=msg) suffix = key.split(NUMA_NODE_PREFIX, 1)[1] try: vnode = int(suffix) except ValueError: msg = _('virtual numa node number must be an integer') raise webob.exc.HTTPBadRequest(explanation=msg) if vnode < 0: msg = _('virtual numa node number must be greater than or ' 'equal to 0') raise webob.exc.HTTPBadRequest(explanation=msg) try: pnode = int(specs[key]) except ValueError: msg = _('%s must be an integer') % key raise webob.exc.HTTPBadRequest(explanation=msg) if pnode < 0: msg = _('%s must be greater than or equal to 0') % key raise webob.exc.HTTPBadRequest(explanation=msg) if pnode >= MAX_HOST_NUMA_NODES: msg = (_('%(K)s value %(P)d is not valid. It must ' 'be an integer from 0 to %(N)d') % {'K': key, 'P': pnode, 'N': MAX_HOST_NUMA_NODES - 1 }) raise webob.exc.HTTPBadRequest(explanation=msg) if vnode >= hw_numa_nodes: msg = _('all hw:numa_node keys must use vnode id less than' ' the specified hw:numa_nodes value (%s)') \ % hw_numa_nodes raise webob.exc.HTTPBadRequest(explanation=msg) # CPU scaling doesn't currently support multiple guest NUMA nodes if hw_numa_nodes > 1 and CPU_SCALING_KEY in specs: msg = _('CPU scaling not supported for instances with' ' multiple NUMA nodes.') raise webob.exc.HTTPConflict(explanation=msg) # CGTS-3716 Asymmetric NUMA topology protection # Do common error check from numa_get_constraints with a clearer error if hw_numa_nodes > 0 and specs.get('hw:numa_cpus.0') is None: if (flavor.vcpus % hw_numa_nodes) > 0: msg = _('flavor vcpus not evenly divisible by' ' the specified hw:numa_nodes value (%s)') \ % hw_numa_nodes raise webob.exc.HTTPConflict(explanation=msg) if (flavor.memory_mb % hw_numa_nodes) > 0: msg = _('flavor memory not evenly divisible by' ' the specified hw:numa_nodes value (%s) so' ' per NUMA-node values must be explicitly specified') \ % hw_numa_nodes raise webob.exc.HTTPConflict(explanation=msg) # Catchall test try: # Check if this modified flavor would be valid assuming # no image metadata. hardware.numa_get_constraints(flavor, image_meta.ImageMeta( properties=image_meta.ImageMetaProps())) except Exception as error: msg = _('%s') % error.message raise webob.exc.HTTPConflict(explanation=msg)
def _schedule(self, context, request_spec, filter_properties): """Returns a list of hosts that meet the required specs, ordered by their fitness. """ elevated = context.elevated() instance_properties = request_spec['instance_properties'] instance_type = request_spec.get("instance_type", None) instance_uuids = request_spec.get("instance_uuids", None) LOG.debug("[HINTS] filter_properties=%s" % filter_properties) # query scheduler_hints from database, and skip what in the parameters. if instance_uuids: inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance_uuids[0]) if inst_extra: scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints or '{}') stats = jsonutils.loads(inst_extra.stats or '{}') else: scheduler_hints = {} stats = {} LOG.debug("[HINTS] Got scheduler_hints via db. " "scheduler_hints=%s" % scheduler_hints) filter_properties['scheduler_hints'] = scheduler_hints filter_properties['stats'] = stats instance_properties['stats'] = stats try: update_group_hosts = self._setup_instance_group( context, filter_properties) except exception.InstanceGroupNotFound as e: # InstanceGroup has already checked in API, # might has been deleted when migrate/ha LOG.warning("ServerGroup %s doesn't exist" % scheduler_hints.get('group', "None")) update_group_hosts = False config_options = self._get_configuration_options() filter_properties.update({ 'context': context, 'request_spec': request_spec, 'config_options': config_options, 'instance_type': instance_type }) self.populate_filter_properties(request_spec, filter_properties) # Find our local list of acceptable hosts by repeatedly # filtering and weighing our options. Each time we choose a # host, we virtually consume resources on it so subsequent # selections can adjust accordingly. # Note: remember, we are using an iterator here. So only # traverse this list once. This can bite you if the hosts # are being scanned in a filter or weighing function. hosts = self._get_all_host_states(elevated) selected_hosts = [] if instance_uuids: num_instances = len(instance_uuids) else: num_instances = request_spec.get('num_instances', 1) for num in xrange(num_instances): #NOTE: add a tracker of filter tracker = HuaweiFilterTracker() filter_properties['__tracker'] = tracker # Filter local hosts based on requirements ... hosts = self.host_manager.get_filtered_hosts(hosts, filter_properties, index=num) if not hosts: # Can't get any more locally. break LOG.debug("Filtered %(hosts)s", {'hosts': hosts}) weighed_hosts = self.host_manager.get_weighed_hosts( hosts, filter_properties) LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts}) scheduler_host_subset_size = CONF.scheduler_host_subset_size if scheduler_host_subset_size > len(weighed_hosts): scheduler_host_subset_size = len(weighed_hosts) if scheduler_host_subset_size < 1: scheduler_host_subset_size = 1 chosen_host = random.choice( weighed_hosts[0:scheduler_host_subset_size]) host_mapper = dict() for host in weighed_hosts: host_mapper[host.obj.host] = host if 'resize_prefer_to_same_host' in filter_properties: origin_host = filter_properties['resize_prefer_to_same_host'] chosen_host = host_mapper.get(origin_host, chosen_host) migrate_host = filter_properties.get('migrate_host') if migrate_host: if migrate_host in host_mapper: chosen_host = host_mapper.get(migrate_host) else: # migrate_host not in filter hosts list # raise NoVaildHost break selected_hosts.append(chosen_host) # Now consume the resources so the filter/weights # will change for the next instance. # NOTE () adding and deleting pci_requests is a temporary # fix to avoid DB access in consume_from_instance() while getting # pci_requests. The change can be removed once pci_requests is # part of the instance object that is passed into the scheduler # APIs pci_requests = filter_properties.get('pci_requests') if pci_requests: instance_properties['pci_requests'] = pci_requests if request_spec.get('instance_type'): instance_properties['numa_topology'] = \ hardware.numa_get_constraints(instance_type, {}) self._update_instance_topology(instance_properties, chosen_host) try: bind_info, instance_numa, __ = utils.get_inst_cpu_bind_info( instance_properties, chosen_host.obj, filter_properties=filter_properties) except exception.NovaException as ex: msg = ("Get cpu binding info on host %(host)s failed, the" " host_numa_top is %(host_numa_top)s, " "instance_properties is %(instance_properties)s") params = { 'host': chosen_host.obj.host, 'host_numa_top': chosen_host.obj.numa_topology, 'instance_properties': instance_properties } # set bind_info and instance_numa is None bind_info = None instance_numa = None LOG.debug(_LE(msg), params) LOG.debug(_LE(ex.format_message())) scheduler_hints = filter_properties.get('scheduler_hints', None) if instance_numa and instance_numa['cells'][0].get('is_huawei'): cells = [] for cell in instance_numa['cells']: cells.append( objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology(cells=cells) instance_properties['numa_topology'] = format_inst_numa try: if isinstance(chosen_host.obj, ironic_host_manager.IronicNodeState): chosen_host.obj.consume_from_instance(instance_properties) else: chosen_host.obj.consume_from_instance( instance_properties, filter_properties) except exception.PciDeviceRequestFailed as e: # pop the select chosen host in order to rollback resource in # memory LOG.warning("consume get exception: %s", e.format_message()) rollback_hosts = [chosen_host] self.host_manager.force_update_host_states( context, rollback_hosts) if pci_requests: del instance_properties['pci_requests'] if update_group_hosts is True: # NOTE(): Group details are serialized into a list now # that they are populated by the conductor, we need to # deserialize them if isinstance(filter_properties['group_hosts'], list): filter_properties['group_hosts'] = set( filter_properties['group_hosts']) self._check_fulfill_for_multiple_create(context, num_instances, selected_hosts) return selected_hosts
def check_instance_numa(instance): flavor = instance.flavor numa_topology = hardware.numa_get_constraints(flavor, instance.image_meta) if numa_topology: return True return False
def _update_usage_from_migration(self, context, instance, image_meta, resources, migration): """Update usage for a single migration. The record may represent an incoming or outbound migration. """ uuid = migration['instance_uuid'] LOG.audit(_("Updating from migration %s") % uuid) incoming = (migration['dest_compute'] == self.host and migration['dest_node'] == self.nodename) outbound = (migration['source_compute'] == self.host and migration['source_node'] == self.nodename) same_node = (incoming and outbound) record = self.tracked_instances.get(uuid, None) itype = None if same_node: # same node resize. record usage for whichever instance type the # instance is *not* in: if (instance['instance_type_id'] == migration['old_instance_type_id']): itype = self._get_instance_type( context, instance, 'new_', migration['new_instance_type_id']) else: # instance record already has new flavor, hold space for a # possible revert to the old instance type: itype = self._get_instance_type( context, instance, 'old_', migration['old_instance_type_id']) elif incoming and not record: # instance has not yet migrated here: itype = self._get_instance_type(context, instance, 'new_', migration['new_instance_type_id']) elif outbound and not record: # instance migrated, but record usage for a possible revert: itype = self._get_instance_type(context, instance, 'old_', migration['old_instance_type_id']) if image_meta is None: image_meta = utils.get_image_from_system_metadata( instance['system_metadata']) if itype: host_topology = resources.get('numa_topology') if host_topology: host_topology = objects.NUMATopology.obj_from_db_obj( host_topology) numa_topology = hardware.numa_get_constraints(itype, image_meta) numa_topology = (hardware.numa_fit_instance_to_host( host_topology, numa_topology)) usage = self._get_usage_dict(itype, numa_topology=numa_topology) if self.pci_tracker: self.pci_tracker.update_pci_for_migration(context, instance) self._update_usage(context, resources, usage) if self.pci_tracker: resources['pci_stats'] = jsonutils.dumps( self.pci_tracker.stats) else: resources['pci_stats'] = jsonutils.dumps([]) self.tracked_migrations[uuid] = (migration, itype)
def build_request_spec(ctxt, image, instances, instance_type=None): """Build a request_spec for the scheduler. The request_spec assumes that all instances to be scheduled are the same type. """ instance = instances[0] if instance_type is None: if isinstance(instance, obj_instance.Instance): instance_type = instance.get_flavor() else: instance_type = flavors.extract_flavor(instance) # WRS: The request_spec requires an updated requested numa_topology, # otherwise we use numa_topology from when instance was first created. # The required numa topology changes when we do resize. requested_topology = None if isinstance(instance, obj_instance.Instance): instance = obj_base.obj_to_primitive(instance) # obj_to_primitive doesn't copy this enough, so be sure # to detach our metadata blob because we modify it below. instance['system_metadata'] = dict(instance.get('system_metadata', {})) if isinstance(instance_type, objects.Flavor): if isinstance(image, dict) and 'properties' in image: image_meta = objects.ImageMeta.from_dict(image) else: image_meta = objects.ImageMeta.from_dict( utils.get_image_from_system_metadata( instance['system_metadata'])) try: requested_topology = hardware.numa_get_constraints( instance_type, image_meta) instance['numa_topology'] = requested_topology except Exception as ex: LOG.error("Cannot get numa constraints, error=%(err)r", {'err': ex}) if isinstance(instance_type, objects.Flavor): instance_type = obj_base.obj_to_primitive(instance_type) # NOTE(danms): Replicate this old behavior because the # scheduler RPC interface technically expects it to be # there. Remove this when we bump the scheduler RPC API to # v5.0 try: flavors.save_flavor_info(instance.get('system_metadata', {}), instance_type) except KeyError: # If the flavor isn't complete (which is legit with a # flavor object, just don't put it in the request spec pass request_spec = { 'image': image or {}, 'instance_properties': instance, 'instance_type': instance_type, 'num_instances': len(instances) } # WRS: Update requested numa topology, needed for resize. if requested_topology is not None: request_spec.update({'numa_topology': requested_topology}) return jsonutils.to_primitive(request_spec)