def _execute(self): image = self.request_spec.get("image") self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties) scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid) hosts = self.scheduler_client.select_destinations(self.context, self.request_spec, self.filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties(self.filter_properties, host_state) # context is not serializable self.filter_properties.pop("context", None) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.prep_resize( self.context, image, self.instance, self.flavor, host, self.reservations, request_spec=self.request_spec, filter_properties=self.filter_properties, node=node, clean_shutdown=self.clean_shutdown, )
def _execute(self): self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, legacy_spec, legacy_props) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) hosts = self.scheduler_client.select_destinations( self.context, self.request_spec) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def _execute(self): image = self.request_spec.get('image') self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties) scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid) # TODO(sbauza): Hydrate here the object until we modify the # scheduler.utils methods to directly use the RequestSpec object spec_obj = objects.RequestSpec.from_primitives( self.context, self.request_spec, self.filter_properties) hosts = self.scheduler_client.select_destinations( self.context, spec_obj) host_state = hosts[0] scheduler_utils.populate_filter_properties(self.filter_properties, host_state) # context is not serializable self.filter_properties.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( self.context, image, self.instance, self.flavor, host, self.reservations, request_spec=self.request_spec, filter_properties=self.filter_properties, node=node, clean_shutdown=self.clean_shutdown)
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id="") if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get("shelved_image_id") # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, "get_image_info", instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _("Unshelve attempted but the image %s " "cannot be found.") % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException(instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, "schedule_instances", instance.uuid): filter_properties = {} scheduler_utils.populate_retry(filter_properties, instance.uuid) request_spec = scheduler_utils.build_request_spec(context, image, [instance]) hosts = self._schedule_instances(context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties(filter_properties, host_state) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node ) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE("Unshelve attempted but vm_state not SHELVED or " "SHELVED_OFFLOADED"), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref image = compute_utils.get_image_metadata( context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec( context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) scheduler_utils.setup_instance_group(context, request_spec, filter_properties) try: scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance['vm_state'] if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor['id'] == instance['instance_type_id']: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': instance['vm_state'], 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback()
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations, clean_shutdown): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) scheduler_utils.populate_retry(filter_properties, instance["uuid"]) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {"vm_state": vm_state, "task_state": None} self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor["id"] == instance["instance_type_id"]: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {"vm_state": vm_state, "task_state": None} self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec) quotas.rollback() try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop("context", None) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node, clean_shutdown=clean_shutdown, ) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {"vm_state": instance.vm_state, "task_state": None} self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec) quotas.rollback()
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec(context, image, instances) scheduler_utils.setup_instance_group(context, request_spec, filter_properties) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList( objects=[objects.NetworkRequest.from_tuple(t) for t in requested_networks]) try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) except Exception as exc: for instance in instances: scheduler_driver.handle_schedule_error(context, exc, instance.uuid, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = quotas_obj.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.populate_retry(filter_properties, instance["uuid"]) hosts = self.scheduler_rpcapi.select_destinations(context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance["vm_state"] if not vm_state: vm_state = vm_states.ACTIVE updates = {"vm_state": vm_state, "task_state": None} self._set_vm_state_and_notify(context, "migrate_server", updates, ex, request_spec) quotas.rollback() msg = _("No valid host found for cold migrate") raise exception.NoValidHost(reason=msg) try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop("context", None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec["instance_type"].pop("extra_specs") (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node, ) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {"vm_state": instance["vm_state"], "task_state": None} self._set_vm_state_and_notify(context, "migrate_server", updates, ex, request_spec) quotas.rollback()
def _execute(self): image = self.request_spec.image self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, legacy_spec, legacy_props) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # TODO(sbauza): Remove that RequestSpec rehydratation once # scheduler.utils methods use directly the NovaObject. self.request_spec = objects.RequestSpec.from_components( self.context, self.instance.uuid, image, self.flavor, self.instance.numa_topology, self.instance.pci_requests, legacy_props, None, self.instance.availability_zone) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() hosts = self.scheduler_client.select_destinations( self.context, self.request_spec) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def _execute(self): image = self.request_spec.get('image') self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties) scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid) # TODO(sbauza): Hydrate here the object until we modify the # scheduler.utils methods to directly use the RequestSpec object spec_obj = objects.RequestSpec.from_primitives( self.context, self.request_spec, self.filter_properties) hosts = self.scheduler_client.select_destinations( self.context, spec_obj) LOG.info('TTTType hosts:%s' % hosts) host_state = hosts[0] scheduler_utils.populate_filter_properties(self.filter_properties, host_state) # context is not serializable self.filter_properties.pop('context', None) #(host, node) = (host_state['host'], host_state['nodename']) LOG.info('TTTType host_state:%s' % host_state) # add by jiahua #(host, node) = (self.filter_properties['destination_host_name'], # self.filter_properties['destination_host_name']) # specify host use specify host, else use localhost #node = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ # else host_state['nodename'] #host = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ # else host_state['host'] node = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ else self.filter_properties['instance_host_name'] host = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ else self.filter_properties['instance_host_name'] LOG.info('TTTType node:%s' % node) LOG.info('TTTType host:%s' % host) self.compute_rpcapi.prep_resize( self.context, image, self.instance, self.flavor, host, self.reservations, request_spec=self.request_spec, filter_properties=self.filter_properties, node=node, clean_shutdown=self.clean_shutdown)
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) # NOTE(takashin): In the case that the target host is specified, # if the migration is failed, it is not necessary to retry # the cold migration to the same host. So make sure that # reschedule will not occur. if 'host' in self.request_spec.requested_destination: legacy_props.pop('retry', None) self.request_spec.retry = None else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_id(self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % { "instance_uuid": self.instance.uuid }) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) LOG.debug( "Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize(self.context, self.instance, legacy_spec['image'], self.flavor, host, migration, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList.from_tuples( requested_networks) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) request_spec = {} try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. request_spec = scheduler_utils.build_request_spec( context, image, instances) scheduler_utils.populate_retry( filter_properties, instances[0].uuid) hosts = self._schedule_instances( context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, exc, request_spec) try: # If the BuildRequest stays around then instance show/lists # will pull from it rather than the errored instance. self._destroy_build_request(context, instance) except exception.BuildRequestNotFound: pass self._cleanup_allocated_networks( context, instance, requested_networks) return for (instance, host) in six.moves.zip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) # This is populated in scheduler_utils.populate_retry num_attempts = local_filter_props.get('retry', {}).get('num_attempts', 1) if num_attempts <= 1: # If this is a reschedule the instance is already mapped to # this cell and the BuildRequest is already deleted so ignore # the logic below. inst_mapping = self._populate_instance_mapping(context, instance, host) try: self._destroy_build_request(context, instance) except exception.BuildRequestNotFound: # This indicates an instance delete has been requested in # the API. Stop the build, cleanup the instance_mapping and # potentially the block_device_mappings # TODO(alaski): Handle block_device_mapping cleanup if inst_mapping: inst_mapping.destroy() return self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def schedule_run_instance(self, context, request_spec, admin_password, injected_files, requested_networks, is_first_time, filter_properties, legacy_bdm_in_spec): """Provisions instances that needs to be scheduled Applies filters and weighters on request properties to get a list of compute hosts and calls them to spawn instance(s). """ payload = dict(request_spec=request_spec) self.notifier.info(context, 'scheduler.run_instance.start', payload) instance_uuids = request_spec.get('instance_uuids') LOG.info( _("Attempting to build %(num_instances)d instance(s) " "uuids: %(instance_uuids)s"), { 'num_instances': len(instance_uuids), 'instance_uuids': instance_uuids }) LOG.debug("Request Spec: %s" % request_spec) # check retry policy. Rather ugly use of instance_uuids[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instance_uuids[0]) weighed_hosts = self._schedule(context, request_spec, filter_properties) # NOTE: Pop instance_uuids as individual creates do not need the # set of uuids. Do not pop before here as the upper exception # handler fo NoValidHost needs the uuid to set error state instance_uuids = request_spec.pop('instance_uuids') # NOTE(comstud): Make sure we do not pass this through. It # contains an instance of RpcContext that cannot be serialized. filter_properties.pop('context', None) for num, instance_uuid in enumerate(instance_uuids): request_spec['instance_properties']['launch_index'] = num try: try: weighed_host = weighed_hosts.pop(0) LOG.info( _("Choosing host %(weighed_host)s " "for instance %(instance_uuid)s"), { 'weighed_host': weighed_host, 'instance_uuid': instance_uuid }) except IndexError: raise exception.NoValidHost(reason="") self._provision_resource(context, weighed_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=instance_uuid, legacy_bdm_in_spec=legacy_bdm_in_spec) except Exception as ex: # NOTE(vish): we don't reraise the exception here to make sure # that all instances in the request get set to # error properly driver.handle_schedule_error(context, ex, instance_uuid, request_spec) # scrub retry host list in case we're scheduling multiple # instances: retry = filter_properties.get('retry', {}) retry['hosts'] = [] self.notifier.info(context, 'scheduler.run_instance.end', payload)
def _execute(self): self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, legacy_spec, legacy_props) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) hosts = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid]) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_resize(self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def _execute(self): # NOTE(sbauza): Force_hosts/nodes needs to be reset if we want to make # sure that the next destination is not forced to be the original host. # This needs to be done before the populate_retry call otherwise # retries will be disabled if the server was created with a forced # host/node. self.request_spec.reset_forced_destinations() # TODO(sbauza): Remove once all the scheduler.utils methods accept a # RequestSpec object in the signature. legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) port_res_req = self.network_api.get_requested_resource_for_instance( self.context, self.instance.uuid) # NOTE(gibi): When cyborg or other module wants to handle similar # non-nova resources then here we have to collect all the external # resource requests in a single list and add them to the RequestSpec. self.request_spec.requested_resources = port_res_req self._restrict_request_spec_to_cell(legacy_props) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) self.request_spec.ensure_network_metadata(self.instance) compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec, self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection = self._schedule() else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. selection = self._reschedule() scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) LOG.debug( "Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( # NOTE(mriedem): Using request_spec.image here is potentially # dangerous if it is not kept up to date (i.e. rebuild/unshelve); # seems like the sane thing to do would be to pass the current # instance.image_meta since that is what MoveClaim will use for # any NUMA topology claims on the destination host... self.context, self.instance, self.request_spec.image, self.flavor, host, migration, request_spec=self.request_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations, clean_shutdown): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor['id'] == instance['instance_type_id']: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', updates, ex, request_spec) quotas.rollback() try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node, clean_shutdown=clean_shutdown) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': instance.vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', updates, ex, request_spec) quotas.rollback()
def unshelve_instance(self, context, instance, request_spec=None): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is # old. We need to mock that the old way filter_properties = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance]) else: # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host request_spec.reset_forced_destinations() # TODO(sbauza): Provide directly the RequestSpec object # when _schedule_instances(), # populate_filter_properties and populate_retry() # accept it filter_properties = request_spec.\ to_legacy_filter_properties_dict() request_spec = request_spec.\ to_legacy_request_spec_dict() scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances(context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec(context, image, instances) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList( objects=[objects.NetworkRequest.from_tuple(t) for t in requested_networks]) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, exc, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def unshelve_instance(self, context, instance, request_spec=None): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter( context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is # old. We need to mock that the old way filter_properties = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance]) else: # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host request_spec.reset_forced_destinations() # TODO(sbauza): Provide directly the RequestSpec object # when _schedule_instances(), # populate_filter_properties and populate_retry() # accept it filter_properties = request_spec.\ to_legacy_filter_properties_dict() request_spec = request_spec.\ to_legacy_request_spec_dict() scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances( context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) snapshot_id = sys_meta.get('shelved_image_id') if snapshot_id: self._delete_image(context, snapshot_id) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): filter_properties = {} scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return for key in ['shelved_at', 'shelved_image_id', 'shelved_host']: if key in sys_meta: del (sys_meta[key]) instance.system_metadata = sys_meta instance.save()
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec( context, image, instances) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in requested_networks ]) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify(context, instance.uuid, 'build_instances', updates, exc, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance( context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def _execute(self): # TODO(sbauza): Remove once all the scheduler.utils methods accept a # RequestSpec object in the signature. legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # TODO(gibi): We need to make sure that the requested_resources field # is re calculated based on neutron ports. self._restrict_request_spec_to_cell(legacy_props) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) self.request_spec.ensure_network_metadata(self.instance) compute_utils.heal_reqspec_is_bfv( self.context, self.request_spec, self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.query_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % {"instance_uuid": self.instance.uuid}) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( # NOTE(mriedem): Using request_spec.image here is potentially # dangerous if it is not kept up to date (i.e. rebuild/unshelve); # seems like the sane thing to do would be to pass the current # instance.image_meta since that is what MoveClaim will use for # any NUMA topology claims on the destination host... self.context, self.instance, self.request_spec.image, self.flavor, host, migration, request_spec=self.request_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance['vm_state'] if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor['id'] == instance['instance_type_id']: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs') (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = { 'vm_state': instance['vm_state'], 'task_state': None } self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback()
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) # NOTE(takashin): In the case that the target host is specified, # if the migration is failed, it is not necessary to retry # the cold migration to the same host. So make sure that # reschedule will not occur. if 'host' in self.request_spec.requested_destination: legacy_props.pop('retry', None) self.request_spec.retry = None else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % {"instance_uuid": self.instance.uuid}) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, migration, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec( context, image, instances) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in requested_networks ]) try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) except Exception as exc: for instance in instances: scheduler_driver.handle_schedule_error(context, exc, instance.uuid, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance( context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def schedule_run_instance(self, context, request_spec, admin_password, injected_files, requested_networks, is_first_time, filter_properties, legacy_bdm_in_spec): """Provisions instances that needs to be scheduled Applies filters and weighters on request properties to get a list of compute hosts and calls them to spawn instance(s). """ payload = dict(request_spec=request_spec) self.notifier.info(context, 'scheduler.run_instance.start', payload) instance_uuids = request_spec.get('instance_uuids') LOG.info(_("Attempting to build %(num_instances)d instance(s) " "uuids: %(instance_uuids)s"), {'num_instances': len(instance_uuids), 'instance_uuids': instance_uuids}) LOG.debug("Request Spec: %s" % request_spec) # check retry policy. Rather ugly use of instance_uuids[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instance_uuids[0]) weighed_hosts = self._schedule(context, request_spec, filter_properties) # NOTE: Pop instance_uuids as individual creates do not need the # set of uuids. Do not pop before here as the upper exception # handler fo NoValidHost needs the uuid to set error state instance_uuids = request_spec.pop('instance_uuids') # NOTE(comstud): Make sure we do not pass this through. It # contains an instance of RpcContext that cannot be serialized. filter_properties.pop('context', None) for num, instance_uuid in enumerate(instance_uuids): request_spec['instance_properties']['launch_index'] = num try: try: weighed_host = weighed_hosts.pop(0) LOG.info(_("Choosing host %(weighed_host)s " "for instance %(instance_uuid)s"), {'weighed_host': weighed_host, 'instance_uuid': instance_uuid}) except IndexError: raise exception.NoValidHost(reason="") self._provision_resource(context, weighed_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=instance_uuid, legacy_bdm_in_spec=legacy_bdm_in_spec) except Exception as ex: # NOTE(vish): we don't reraise the exception here to make sure # that all instances in the request get set to # error properly driver.handle_schedule_error(context, ex, instance_uuid, request_spec) # scrub retry host list in case we're scheduling multiple # instances: retry = filter_properties.get('retry', {}) retry['hosts'] = [] self.notifier.info(context, 'scheduler.run_instance.end', payload)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec( context, image, instances) # NOTE(sbauza): filter_properties['hints'] can be None hints = filter_properties.get('scheduler_hints', {}) or {} group_hint = hints.get('group') group_hosts = filter_properties.get('group_hosts') group_info = scheduler_utils.setup_instance_group( context, group_hint, group_hosts) if isinstance(group_info, tuple): filter_properties['group_updated'] = True (filter_properties['group_hosts'], filter_properties['group_policies']) = group_info # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in requested_networks ]) try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. # (luzhq) 验证重试策略 # 更新filter_properties中的重试属性,若当前为重试部署则同时检测当前 # 的重试次数是否超过最大重试次数,需要注意的是:这里使用instances[0] # 表示如重试的话只会有一个instance重试 scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) except Exception as exc: for instance in instances: scheduler_driver.handle_schedule_error(context, exc, instance.uuid, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) # (luzhq) self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.compute_rpcapi.build_and_run_instance( context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])