def _test_numa_topology(self, resources, limit): network_info = objects.InstanceInfoCache.get_by_instance_uuid( self.context, self.instance['uuid']).network_info self.instance['numa_topology'] = self.numa_topology or {} self.instance['vcpus'] = self.instance_type['vcpus'] self.instance['memory_mb'] = self.memory_mb # check if cpu&mem are ok bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info( self.instance, self.tracker.host, network_info=network_info, action="resize") if instance_numa and instance_numa['cells'][0].get('mem'): cells = [] for cell in instance_numa['cells']: cells.append(objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology( cells=cells, instance_uuid=self.instance['uuid']) self.claimed_numa_topology = format_inst_numa self.bind_info = bind_info self.instance['numa_topology'] = format_inst_numa elif not instance_numa: return else: self.claimed_numa_topology = instance_numa self.bind_info = bind_info self.instance['numa_topology'] = instance_numa
def _test_numa_topology(self, resources, limit): network_info = objects.InstanceInfoCache.get_by_instance_uuid( self.context, self.instance['uuid']).network_info self.instance['numa_topology'] = self.numa_topology or {} self.instance['vcpus'] = self.instance_type['vcpus'] self.instance['memory_mb'] = self.memory_mb # check if cpu&mem are ok bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info( self.instance, self.tracker.host, network_info=network_info, action="resize") if instance_numa and instance_numa['cells'][0].get('mem'): cells = [] for cell in instance_numa['cells']: cells.append( objects.InstanceNUMACell(id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology( cells=cells, instance_uuid=self.instance['uuid']) self.claimed_numa_topology = format_inst_numa self.bind_info = bind_info self.instance['numa_topology'] = format_inst_numa elif not instance_numa: return else: self.claimed_numa_topology = instance_numa self.bind_info = bind_info self.instance['numa_topology'] = instance_numa
def get_cpu_at_live_migration(self, context, instance, network_info, block_migration, migrate_data): # should check mem is ok self.test_mem_in_resource_tracker(context, instance) bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info( instance, self.host, network_info=network_info) if instance_numa and instance_numa['cells'][0].get('mem'): numa_id = instance_numa['cells'][0]['id'] else: numa_id = None # check live migration record has already created migrate_data['cpu'] = bind_info migrate_data['numa'] = numa_id self._create_live_migration_record( context, instance, block_migration, migrate_data) if instance_numa and instance_numa['cells'][0].get('mem'): cells = [] for cell in instance_numa['cells']: cells.append(objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology( cells=cells, instance_uuid=instance['uuid']) sys_meta = instance.system_metadata sys_meta['new_numa_topo'] = jsonutils.dumps(format_inst_numa) sys_meta['new_bind_info'] = jsonutils.dumps(bind_info) instance.system_metadata = sys_meta instance.save() instance.numa_topology = format_inst_numa # trigger update_resource self._update_usage_from_instance(context, self.compute_node, instance) # if necessary elevated = context.elevated() # persist changes to the compute node: self._update(elevated, self.compute_node) return migrate_data
def get_cpu_at_live_migration(self, context, instance, network_info, block_migration, migrate_data): # should check mem is ok self.test_mem_in_resource_tracker(context, instance) bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info(instance, self.host, network_info=network_info) if instance_numa and instance_numa["cells"][0].get("mem"): numa_id = instance_numa["cells"][0]["id"] else: numa_id = None # check live migration record has already created migrate_data["cpu"] = bind_info migrate_data["numa"] = numa_id self._create_live_migration_record(context, instance, block_migration, migrate_data) if instance_numa and instance_numa["cells"][0].get("mem"): cells = [] for cell in instance_numa["cells"]: cells.append( objects.InstanceNUMACell( id=cell["id"], cpuset=set(cell["cpuset"]), memory=cell["mem"]["total"], pagesize=cell.get("pagesize"), ) ) format_inst_numa = objects.InstanceNUMATopology(cells=cells, instance_uuid=instance["uuid"]) sys_meta = instance.system_metadata sys_meta["new_numa_topo"] = jsonutils.dumps(format_inst_numa) sys_meta["new_bind_info"] = jsonutils.dumps(bind_info) instance.system_metadata = sys_meta instance.save() instance.numa_topology = format_inst_numa # trigger update_resource self._update_usage_from_instance(context, self.compute_node, instance) # if necessary elevated = context.elevated() # persist changes to the compute node: self._update(elevated, self.compute_node) return migrate_data
def _test_core_bind(self, context, instance, resource_tracker): LOG.debug("get instance cpu bind info in _test_core_bind") filter_properties = {} inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance.uuid) if inst_extra: scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints or '{}') stats = jsonutils.loads(inst_extra.stats or '{}') else: scheduler_hints = {} stats = {} filter_properties['scheduler_hints'] = scheduler_hints filter_properties['stats'] = stats pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( context, instance['uuid']) if pci_requests: filter_properties['pci_requests'] = pci_requests bind_info, instance_numa, enable_ht = sched_utils.get_inst_cpu_bind_info( instance, resource_tracker.host, filter_properties=filter_properties) sched_utils.update_cpu_bind_info_to_db(bind_info, instance.uuid, instance_numa) if instance_numa and instance_numa['cells'][0].get('is_huawei'): cells = [] for cell in instance_numa['cells']: cells.append( objects.InstanceNUMACell(id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology(cells=cells) self.claimed_numa_topology = format_inst_numa self.instance['numa_topology'] = format_inst_numa
def _test_core_bind(self, context, instance, resource_tracker): LOG.debug("get instance cpu bind info in _test_core_bind") filter_properties = {} inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance.uuid) if inst_extra: scheduler_hints = jsonutils.loads( inst_extra.scheduler_hints or '{}') stats = jsonutils.loads(inst_extra.stats or '{}') else: scheduler_hints = {} stats = {} filter_properties['scheduler_hints'] = scheduler_hints filter_properties['stats'] = stats pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( context, instance['uuid']) if pci_requests: filter_properties['pci_requests'] = pci_requests bind_info, instance_numa, enable_ht = sched_utils.get_inst_cpu_bind_info( instance, resource_tracker.host, filter_properties=filter_properties) sched_utils.update_cpu_bind_info_to_db(bind_info, instance.uuid, instance_numa) if instance_numa and instance_numa['cells'][0].get('is_huawei'): cells = [] for cell in instance_numa['cells']: cells.append(objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology(cells=cells) self.claimed_numa_topology = format_inst_numa self.instance['numa_topology'] = format_inst_numa
def _schedule(self, context, request_spec, filter_properties): """Returns a list of hosts that meet the required specs, ordered by their fitness. """ elevated = context.elevated() instance_properties = request_spec['instance_properties'] instance_type = request_spec.get("instance_type", None) instance_uuids = request_spec.get("instance_uuids", None) LOG.debug("[HINTS] filter_properties=%s" % filter_properties) # query scheduler_hints from database, and skip what in the parameters. if instance_uuids: inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance_uuids[0]) if inst_extra: scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints or '{}') stats = jsonutils.loads(inst_extra.stats or '{}') else: scheduler_hints = {} stats = {} LOG.debug("[HINTS] Got scheduler_hints via db. " "scheduler_hints=%s" % scheduler_hints) filter_properties['scheduler_hints'] = scheduler_hints filter_properties['stats'] = stats instance_properties['stats'] = stats try: update_group_hosts = self._setup_instance_group( context, filter_properties) except exception.InstanceGroupNotFound as e: # InstanceGroup has already checked in API, # might has been deleted when migrate/ha LOG.warning("ServerGroup %s doesn't exist" % scheduler_hints.get('group', "None")) update_group_hosts = False config_options = self._get_configuration_options() filter_properties.update({ 'context': context, 'request_spec': request_spec, 'config_options': config_options, 'instance_type': instance_type }) self.populate_filter_properties(request_spec, filter_properties) # Find our local list of acceptable hosts by repeatedly # filtering and weighing our options. Each time we choose a # host, we virtually consume resources on it so subsequent # selections can adjust accordingly. # Note: remember, we are using an iterator here. So only # traverse this list once. This can bite you if the hosts # are being scanned in a filter or weighing function. hosts = self._get_all_host_states(elevated) selected_hosts = [] if instance_uuids: num_instances = len(instance_uuids) else: num_instances = request_spec.get('num_instances', 1) for num in xrange(num_instances): #NOTE: add a tracker of filter tracker = HuaweiFilterTracker() filter_properties['__tracker'] = tracker # Filter local hosts based on requirements ... hosts = self.host_manager.get_filtered_hosts(hosts, filter_properties, index=num) if not hosts: # Can't get any more locally. break LOG.debug("Filtered %(hosts)s", {'hosts': hosts}) weighed_hosts = self.host_manager.get_weighed_hosts( hosts, filter_properties) LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts}) scheduler_host_subset_size = CONF.scheduler_host_subset_size if scheduler_host_subset_size > len(weighed_hosts): scheduler_host_subset_size = len(weighed_hosts) if scheduler_host_subset_size < 1: scheduler_host_subset_size = 1 chosen_host = random.choice( weighed_hosts[0:scheduler_host_subset_size]) host_mapper = dict() for host in weighed_hosts: host_mapper[host.obj.host] = host if 'resize_prefer_to_same_host' in filter_properties: origin_host = filter_properties['resize_prefer_to_same_host'] chosen_host = host_mapper.get(origin_host, chosen_host) migrate_host = filter_properties.get('migrate_host') if migrate_host: if migrate_host in host_mapper: chosen_host = host_mapper.get(migrate_host) else: # migrate_host not in filter hosts list # raise NoVaildHost break selected_hosts.append(chosen_host) # Now consume the resources so the filter/weights # will change for the next instance. # NOTE () adding and deleting pci_requests is a temporary # fix to avoid DB access in consume_from_instance() while getting # pci_requests. The change can be removed once pci_requests is # part of the instance object that is passed into the scheduler # APIs pci_requests = filter_properties.get('pci_requests') if pci_requests: instance_properties['pci_requests'] = pci_requests if request_spec.get('instance_type'): instance_properties['numa_topology'] = \ hardware.numa_get_constraints(instance_type, {}) self._update_instance_topology(instance_properties, chosen_host) try: bind_info, instance_numa, __ = utils.get_inst_cpu_bind_info( instance_properties, chosen_host.obj, filter_properties=filter_properties) except exception.NovaException as ex: msg = ("Get cpu binding info on host %(host)s failed, the" " host_numa_top is %(host_numa_top)s, " "instance_properties is %(instance_properties)s") params = { 'host': chosen_host.obj.host, 'host_numa_top': chosen_host.obj.numa_topology, 'instance_properties': instance_properties } # set bind_info and instance_numa is None bind_info = None instance_numa = None LOG.debug(_LE(msg), params) LOG.debug(_LE(ex.format_message())) scheduler_hints = filter_properties.get('scheduler_hints', None) if instance_numa and instance_numa['cells'][0].get('is_huawei'): cells = [] for cell in instance_numa['cells']: cells.append( objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology(cells=cells) instance_properties['numa_topology'] = format_inst_numa try: if isinstance(chosen_host.obj, ironic_host_manager.IronicNodeState): chosen_host.obj.consume_from_instance(instance_properties) else: chosen_host.obj.consume_from_instance( instance_properties, filter_properties) except exception.PciDeviceRequestFailed as e: # pop the select chosen host in order to rollback resource in # memory LOG.warning("consume get exception: %s", e.format_message()) rollback_hosts = [chosen_host] self.host_manager.force_update_host_states( context, rollback_hosts) if pci_requests: del instance_properties['pci_requests'] if update_group_hosts is True: # NOTE(): Group details are serialized into a list now # that they are populated by the conductor, we need to # deserialize them if isinstance(filter_properties['group_hosts'], list): filter_properties['group_hosts'] = set( filter_properties['group_hosts']) self._check_fulfill_for_multiple_create(context, num_instances, selected_hosts) return selected_hosts
def _schedule(self, context, request_spec, filter_properties): """Returns a list of hosts that meet the required specs, ordered by their fitness. """ elevated = context.elevated() instance_properties = request_spec['instance_properties'] instance_type = request_spec.get("instance_type", None) instance_uuids = request_spec.get("instance_uuids", None) LOG.debug("[HINTS] filter_properties=%s" % filter_properties) # query scheduler_hints from database, and skip what in the parameters. if instance_uuids: inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance_uuids[0]) if inst_extra: scheduler_hints = jsonutils.loads( inst_extra.scheduler_hints or '{}') stats = jsonutils.loads(inst_extra.stats or '{}') else: scheduler_hints = {} stats = {} LOG.debug("[HINTS] Got scheduler_hints via db. " "scheduler_hints=%s" % scheduler_hints) filter_properties['scheduler_hints'] = scheduler_hints filter_properties['stats'] = stats instance_properties['stats'] = stats try: update_group_hosts = self._setup_instance_group(context,filter_properties) except exception.InstanceGroupNotFound as e: # InstanceGroup has already checked in API, # might has been deleted when migrate/ha LOG.warning("ServerGroup %s doesn't exist" % scheduler_hints.get('group', "None")) update_group_hosts = False config_options = self._get_configuration_options() filter_properties.update({'context': context, 'request_spec': request_spec, 'config_options': config_options, 'instance_type': instance_type}) self.populate_filter_properties(request_spec, filter_properties) # Find our local list of acceptable hosts by repeatedly # filtering and weighing our options. Each time we choose a # host, we virtually consume resources on it so subsequent # selections can adjust accordingly. # Note: remember, we are using an iterator here. So only # traverse this list once. This can bite you if the hosts # are being scanned in a filter or weighing function. hosts = self._get_all_host_states(elevated) selected_hosts = [] if instance_uuids: num_instances = len(instance_uuids) else: num_instances = request_spec.get('num_instances', 1) for num in xrange(num_instances): #NOTE: add a tracker of filter tracker = HuaweiFilterTracker() filter_properties['__tracker'] = tracker # Filter local hosts based on requirements ... hosts = self.host_manager.get_filtered_hosts(hosts, filter_properties, index=num) if not hosts: # Can't get any more locally. break LOG.debug("Filtered %(hosts)s", {'hosts': hosts}) weighed_hosts = self.host_manager.get_weighed_hosts( hosts, filter_properties) LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts}) scheduler_host_subset_size = CONF.scheduler_host_subset_size if scheduler_host_subset_size > len(weighed_hosts): scheduler_host_subset_size = len(weighed_hosts) if scheduler_host_subset_size < 1: scheduler_host_subset_size = 1 chosen_host = random.choice( weighed_hosts[0:scheduler_host_subset_size]) host_mapper = dict() for host in weighed_hosts: host_mapper[host.obj.host] = host if 'resize_prefer_to_same_host' in filter_properties: origin_host = filter_properties['resize_prefer_to_same_host'] chosen_host = host_mapper.get(origin_host, chosen_host) migrate_host = filter_properties.get('migrate_host') if migrate_host: if migrate_host in host_mapper: chosen_host = host_mapper.get(migrate_host) else: # migrate_host not in filter hosts list # raise NoVaildHost break selected_hosts.append(chosen_host) # Now consume the resources so the filter/weights # will change for the next instance. # NOTE () adding and deleting pci_requests is a temporary # fix to avoid DB access in consume_from_instance() while getting # pci_requests. The change can be removed once pci_requests is # part of the instance object that is passed into the scheduler # APIs pci_requests = filter_properties.get('pci_requests') if pci_requests: instance_properties['pci_requests'] = pci_requests if request_spec.get('instance_type'): instance_properties['numa_topology'] = \ hardware.numa_get_constraints(instance_type, {}) self._update_instance_topology(instance_properties, chosen_host) try: bind_info, instance_numa, __ = utils.get_inst_cpu_bind_info( instance_properties, chosen_host.obj, filter_properties=filter_properties) except exception.NovaException as ex: msg = ("Get cpu binding info on host %(host)s failed, the" " host_numa_top is %(host_numa_top)s, " "instance_properties is %(instance_properties)s") params = {'host': chosen_host.obj.host, 'host_numa_top': chosen_host.obj.numa_topology, 'instance_properties': instance_properties} # set bind_info and instance_numa is None bind_info = None instance_numa = None LOG.debug(_LE(msg), params) LOG.debug(_LE(ex.format_message())) scheduler_hints = filter_properties.get('scheduler_hints', None) if instance_numa and instance_numa['cells'][0].get('is_huawei'): cells = [] for cell in instance_numa['cells']: cells.append(objects.InstanceNUMACell( id=cell['id'], cpuset=set(cell['cpuset']), memory=cell['mem']['total'], pagesize=cell.get('pagesize'))) format_inst_numa = objects.InstanceNUMATopology(cells=cells) instance_properties['numa_topology'] = format_inst_numa try: if isinstance(chosen_host.obj, ironic_host_manager.IronicNodeState): chosen_host.obj.consume_from_instance(instance_properties) else: chosen_host.obj.consume_from_instance(instance_properties, filter_properties) except exception.PciDeviceRequestFailed as e: # pop the select chosen host in order to rollback resource in # memory LOG.warning("consume get exception: %s", e.format_message()) rollback_hosts = [chosen_host] self.host_manager.force_update_host_states(context, rollback_hosts) if pci_requests: del instance_properties['pci_requests'] if update_group_hosts is True: # NOTE(): Group details are serialized into a list now # that they are populated by the conductor, we need to # deserialize them if isinstance(filter_properties['group_hosts'], list): filter_properties['group_hosts'] = set( filter_properties['group_hosts']) self._check_fulfill_for_multiple_create(context, num_instances, selected_hosts) return selected_hosts