def _test_populate_filter_props(self, host_state_obj=True, with_retry=True): if with_retry: filter_properties = dict(retry=dict(hosts=[])) else: filter_properties = dict() if host_state_obj: class host_state(object): host = 'fake-host' nodename = 'fake-node' limits = 'fake-limits' else: host_state = dict(host='fake-host', nodename='fake-node', limits='fake-limits') scheduler_utils.populate_filter_properties(filter_properties, host_state) if with_retry: # So we can check for 2 hosts scheduler_utils.populate_filter_properties(filter_properties, host_state) self.assertEqual('fake-limits', filter_properties['limits']) if with_retry: self.assertEqual([['fake-host', 'fake-node'], ['fake-host', 'fake-node']], filter_properties['retry']['hosts']) else: self.assertNotIn('retry', filter_properties)
def schedule_prep_resize(self, context, image, request_spec, filter_properties, instance, instance_type, reservations): """Select a target for resize. Selects a target host for the instance, post-resize, and casts the prep_resize operation to it. """ weighed_hosts = self._schedule(context, request_spec, filter_properties, [instance['uuid']]) if not weighed_hosts: raise exception.NoValidHost(reason="") weighed_host = weighed_hosts.pop(0) scheduler_utils.populate_filter_properties(filter_properties, weighed_host.obj) # context is not serializable filter_properties.pop('context', None) # Forward off to the host self.compute_rpcapi.prep_resize(context, image, instance, instance_type, weighed_host.obj.host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=weighed_host.obj.nodename)
def _execute(self): self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, legacy_spec, legacy_props) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) hosts = self.scheduler_client.select_destinations( self.context, self.request_spec) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def _provision_resource(self, context, weighed_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=None, legacy_bdm_in_spec=True): """Create the requested resource in this Zone.""" # NOTE(vish): add our current instance back into the request spec request_spec['instance_uuids'] = [instance_uuid] payload = dict(request_spec=request_spec, weighted_host=weighed_host.to_dict(), instance_id=instance_uuid) self.notifier.info(context, 'scheduler.run_instance.scheduled', payload) # Update the metadata if necessary try: updated_instance = driver.instance_update_db(context, instance_uuid) except exception.InstanceNotFound: LOG.warning(_("Instance disappeared during scheduling"), context=context, instance_uuid=instance_uuid) else: scheduler_utils.populate_filter_properties(filter_properties, weighed_host.obj) self.compute_rpcapi.run_instance(context, instance=updated_instance, host=weighed_host.obj.host, request_spec=request_spec, filter_properties=filter_properties, requested_networks=requested_networks, injected_files=injected_files, admin_password=admin_password, is_first_time=is_first_time, node=weighed_host.obj.nodename, legacy_bdm_in_spec=legacy_bdm_in_spec)
def _execute(self): image = self.request_spec.get("image") self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties) scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid) hosts = self.scheduler_client.select_destinations(self.context, self.request_spec, self.filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties(self.filter_properties, host_state) # context is not serializable self.filter_properties.pop("context", None) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.prep_resize( self.context, image, self.instance, self.flavor, host, self.reservations, request_spec=self.request_spec, filter_properties=self.filter_properties, node=node, clean_shutdown=self.clean_shutdown, )
def _provision_resource(self, context, weighed_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=None): """Create the requested resource in this Zone.""" # NOTE(vish): add our current instance back into the request spec request_spec['instance_uuids'] = [instance_uuid] payload = dict(request_spec=request_spec, weighted_host=weighed_host.to_dict(), instance_id=instance_uuid) notifier.notify(context, notifier.publisher_id("scheduler"), 'scheduler.run_instance.scheduled', notifier.INFO, payload) # Update the metadata if necessary scheduler_hints = filter_properties.get('scheduler_hints') or {} group = scheduler_hints.get('group', None) values = None if group: values = request_spec['instance_properties']['system_metadata'] values.update({'group': group}) values = {'system_metadata': values} updated_instance = driver.instance_update_db(context, instance_uuid, extra_values=values) scheduler_utils.populate_filter_properties(filter_properties, weighed_host.obj) self.compute_rpcapi.run_instance(context, instance=updated_instance, host=weighed_host.obj.host, request_spec=request_spec, filter_properties=filter_properties, requested_networks=requested_networks, injected_files=injected_files, admin_password=admin_password, is_first_time=is_first_time, node=weighed_host.obj.nodename)
def _execute(self): image = self.request_spec.get('image') self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties) scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid) # TODO(sbauza): Hydrate here the object until we modify the # scheduler.utils methods to directly use the RequestSpec object spec_obj = objects.RequestSpec.from_primitives( self.context, self.request_spec, self.filter_properties) hosts = self.scheduler_client.select_destinations( self.context, spec_obj) host_state = hosts[0] scheduler_utils.populate_filter_properties(self.filter_properties, host_state) # context is not serializable self.filter_properties.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( self.context, image, self.instance, self.flavor, host, self.reservations, request_spec=self.request_spec, filter_properties=self.filter_properties, node=node, clean_shutdown=self.clean_shutdown)
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id="") if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get("shelved_image_id") # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, "get_image_info", instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _("Unshelve attempted but the image %s " "cannot be found.") % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException(instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, "schedule_instances", instance.uuid): filter_properties = {} scheduler_utils.populate_retry(filter_properties, instance.uuid) request_spec = scheduler_utils.build_request_spec(context, image, [instance]) hosts = self._schedule_instances(context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties(filter_properties, host_state) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node ) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE("Unshelve attempted but vm_state not SHELVED or " "SHELVED_OFFLOADED"), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref image = compute_utils.get_image_metadata( context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec( context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) scheduler_utils.setup_instance_group(context, request_spec, filter_properties) try: scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance['vm_state'] if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor['id'] == instance['instance_type_id']: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': instance['vm_state'], 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback()
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id="") if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) snapshot_id = sys_meta.get("shelved_image_id") if snapshot_id: self._delete_image(context, snapshot_id) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image_id = sys_meta.get("shelved_image_id") with compute_utils.EventReporter(context, "get_image_info", instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() if image_id: reason = _("Unshelve attempted but the image %s " "cannot be found.") % image_id else: reason = _("Unshelve attempted but the image_id is " "not provided") LOG.error(reason, instance=instance) raise exception.UnshelveException(instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, "schedule_instances", instance.uuid): filter_properties = {} hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties(filter_properties, host_state) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node ) except exception.NoValidHost: instance.task_state = None instance.save() LOG.warning(_("No valid host found for unshelve instance"), instance=instance) return else: LOG.error(_LE("Unshelve attempted but vm_state not SHELVED or " "SHELVED_OFFLOADED"), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return for key in ["shelved_at", "shelved_image_id", "shelved_host"]: if key in sys_meta: del (sys_meta[key]) instance.system_metadata = sys_meta instance.save()
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations, clean_shutdown): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) scheduler_utils.populate_retry(filter_properties, instance["uuid"]) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {"vm_state": vm_state, "task_state": None} self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor["id"] == instance["instance_type_id"]: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {"vm_state": vm_state, "task_state": None} self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec) quotas.rollback() try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop("context", None) (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node, clean_shutdown=clean_shutdown, ) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {"vm_state": instance.vm_state, "task_state": None} self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec) quotas.rollback()
def _test_populate_filter_props(self, host_state_obj=True, with_retry=True, force_hosts=None, force_nodes=None): if force_hosts is None: force_hosts = [] if force_nodes is None: force_nodes = [] if with_retry: if ((len(force_hosts) == 1 and len(force_nodes) <= 1) or (len(force_nodes) == 1 and len(force_hosts) <= 1)): filter_properties = dict(force_hosts=force_hosts, force_nodes=force_nodes) elif len(force_hosts) > 1 or len(force_nodes) > 1: filter_properties = dict(retry=dict(hosts=[]), force_hosts=force_hosts, force_nodes=force_nodes) else: filter_properties = dict(retry=dict(hosts=[])) else: filter_properties = dict() if host_state_obj: class host_state(object): host = 'fake-host' nodename = 'fake-node' limits = 'fake-limits' else: host_state = dict(host='fake-host', nodename='fake-node', limits='fake-limits') scheduler_utils.populate_filter_properties(filter_properties, host_state) enable_retry_force_hosts = not force_hosts or len(force_hosts) > 1 enable_retry_force_nodes = not force_nodes or len(force_nodes) > 1 if with_retry or enable_retry_force_hosts or enable_retry_force_nodes: # So we can check for 2 hosts scheduler_utils.populate_filter_properties(filter_properties, host_state) if force_hosts: expected_limits = None else: expected_limits = 'fake-limits' self.assertEqual(expected_limits, filter_properties.get('limits')) if (with_retry and enable_retry_force_hosts and enable_retry_force_nodes): self.assertEqual([['fake-host', 'fake-node'], ['fake-host', 'fake-node']], filter_properties['retry']['hosts']) else: self.assertNotIn('retry', filter_properties)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec(context, image, instances) scheduler_utils.setup_instance_group(context, request_spec, filter_properties) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList( objects=[objects.NetworkRequest.from_tuple(t) for t in requested_networks]) try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) except Exception as exc: for instance in instances: scheduler_driver.handle_schedule_error(context, exc, instance.uuid, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) snapshot_id = sys_meta.get('shelved_image_id') if snapshot_id: self._delete_image(context, snapshot_id) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: try: with compute_utils.EventReporter(context, self.db, 'get_image_info', instance.uuid): image = self._get_image(context, sys_meta['shelved_image_id']) except exception.ImageNotFound: with excutils.save_and_reraise_exception(): LOG.error(_('Unshelve attempted but vm_state not SHELVED ' 'or SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() try: with compute_utils.EventReporter(context, self.db, 'schedule_instances', instance.uuid): filter_properties = {} hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except exception.NoValidHost as ex: instance.task_state = None instance.save() LOG.warning(_("No valid host found for unshelve instance"), instance=instance) return else: LOG.error(_('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return for key in ['shelved_at', 'shelved_image_id', 'shelved_host']: if key in sys_meta: del(sys_meta[key]) instance.system_metadata = sys_meta instance.save()
def test_post_select_populate(self): # Test addition of certain filter props after a node is selected. retry = {'hosts': [], 'num_attempts': 1} filter_properties = {'retry': retry} selection = objects.Selection(service_host="host", nodename="node", cell_uuid=uuids.cell) scheduler_utils.populate_filter_properties(filter_properties, selection) self.assertEqual(['host', 'node'], filter_properties['retry']['hosts'][0])
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref if image_ref: image = self._get_image(context, image_ref) else: image = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance], instance_type=flavor) try: hosts = self.scheduler_rpcapi.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance['vm_state'] if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) if reservations: self.quotas.rollback(context, reservations) LOG.warning(_("No valid host found for cold migrate")) return try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs') (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': vm_states.ERROR, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) if reservations: self.quotas.rollback(context, reservations)
def test_post_select_populate(self): # Test addition of certain filter props after a node is selected. retry = {"hosts": [], "num_attempts": 1} filter_properties = {"retry": retry} host_state = host_manager.HostState("host", "node") host_state.limits["vcpus"] = 5 scheduler_utils.populate_filter_properties(filter_properties, host_state) self.assertEqual(["host", "node"], filter_properties["retry"]["hosts"][0]) self.assertEqual({"vcpus": 5}, host_state.limits)
def test_post_select_populate(self): # Test addition of certain filter props after a node is selected. retry = {'hosts': [], 'num_attempts': 1} filter_properties = {'retry': retry} host_state = host_manager.HostState('host', 'node') host_state.limits['vcpus'] = 5 scheduler_utils.populate_filter_properties(filter_properties, host_state) self.assertEqual(['host', 'node'], filter_properties['retry']['hosts'][0]) self.assertEqual({'vcpus': 5}, host_state.limits)
def _execute(self): image = self.request_spec.image self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, legacy_spec, legacy_props) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # TODO(sbauza): Remove that RequestSpec rehydratation once # scheduler.utils methods use directly the NovaObject. self.request_spec = objects.RequestSpec.from_components( self.context, self.instance.uuid, image, self.flavor, self.instance.numa_topology, self.instance.pci_requests, legacy_props, None, self.instance.availability_zone) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() hosts = self.scheduler_client.select_destinations( self.context, self.request_spec) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_live_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def _provision_resource( self, context, weighed_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=None, legacy_bdm_in_spec=True, ): """Create the requested resource in this Zone.""" # NOTE(vish): add our current instance back into the request spec request_spec["instance_uuids"] = [instance_uuid] payload = dict(request_spec=request_spec, weighted_host=weighed_host.to_dict(), instance_id=instance_uuid) self.notifier.info(context, "scheduler.run_instance.scheduled", payload) # Update the metadata if necessary scheduler_hints = filter_properties.get("scheduler_hints") or {} group = scheduler_hints.get("group", None) values = None if group: values = request_spec["instance_properties"]["system_metadata"] values.update({"group": group}) values = {"system_metadata": values} try: updated_instance = driver.instance_update_db(context, instance_uuid, extra_values=values) except exception.InstanceNotFound: LOG.warning(_("Instance disappeared during scheduling"), context=context, instance_uuid=instance_uuid) else: scheduler_utils.populate_filter_properties(filter_properties, weighed_host.obj) self.compute_rpcapi.run_instance( context, instance=updated_instance, host=weighed_host.obj.host, request_spec=request_spec, filter_properties=filter_properties, requested_networks=requested_networks, injected_files=injected_files, admin_password=admin_password, is_first_time=is_first_time, node=weighed_host.obj.nodename, legacy_bdm_in_spec=legacy_bdm_in_spec, )
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = quotas_obj.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.populate_retry(filter_properties, instance["uuid"]) hosts = self.scheduler_rpcapi.select_destinations(context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance["vm_state"] if not vm_state: vm_state = vm_states.ACTIVE updates = {"vm_state": vm_state, "task_state": None} self._set_vm_state_and_notify(context, "migrate_server", updates, ex, request_spec) quotas.rollback() msg = _("No valid host found for cold migrate") raise exception.NoValidHost(reason=msg) try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop("context", None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec["instance_type"].pop("extra_specs") (host, node) = (host_state["host"], host_state["nodename"]) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node, ) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {"vm_state": instance["vm_state"], "task_state": None} self._set_vm_state_and_notify(context, "migrate_server", updates, ex, request_spec) quotas.rollback()
def prep_resize(self, context, image, request_spec, filter_properties, instance, instance_type, reservations): """Tries to call schedule_prep_resize on the driver. Sets instance vm_state to ACTIVE on NoHostFound Sets vm_state to ERROR on other exceptions """ instance_uuid = instance['uuid'] with compute_utils.EventReporter(context, conductor_api.LocalAPI(), 'schedule', instance_uuid): try: request_spec['num_instances'] = len( request_spec['instance_uuids']) hosts = self.driver.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, instance_type, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except exception.NoValidHost as ex: vm_state = instance.get('vm_state', vm_states.ACTIVE) self._set_vm_state_and_notify('prep_resize', { 'vm_state': vm_state, 'task_state': None }, context, ex, request_spec) if reservations: QUOTAS.rollback(context, reservations) except Exception as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify('prep_resize', { 'vm_state': vm_states.ERROR, 'task_state': None }, context, ex, request_spec) if reservations: QUOTAS.rollback(context, reservations)
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.get('image_ref') if image_ref: image = self._get_image(context, image_ref) else: image = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance]) try: hosts = self.scheduler_rpcapi.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: updates = {'vm_state': vm_states.ACTIVE, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) if reservations: self.quotas.rollback(context, reservations) LOG.warning(_("No valid host found for cold migrate")) return try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs') (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': vm_states.ERROR, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) if reservations: self.quotas.rollback(context, reservations)
def _test_populate_filter_props(self, host_state_obj=True, with_retry=True, force_hosts=None, force_nodes=None): if force_hosts is None: force_hosts = [] if force_nodes is None: force_nodes = [] if with_retry: if not force_hosts and not force_nodes: filter_properties = dict(retry=dict(hosts=[])) else: filter_properties = dict(force_hosts=force_hosts, force_nodes=force_nodes) else: filter_properties = dict() if host_state_obj: class host_state(object): host = 'fake-host' nodename = 'fake-node' limits = 'fake-limits' else: host_state = dict(host='fake-host', nodename='fake-node', limits='fake-limits') scheduler_utils.populate_filter_properties(filter_properties, host_state) if with_retry and not force_hosts and not force_nodes: # So we can check for 2 hosts scheduler_utils.populate_filter_properties(filter_properties, host_state) if force_hosts: expected_limits = None else: expected_limits = 'fake-limits' self.assertEqual(expected_limits, filter_properties.get('limits')) if with_retry and not force_hosts and not force_nodes: self.assertEqual( [['fake-host', 'fake-node'], ['fake-host', 'fake-node']], filter_properties['retry']['hosts']) else: self.assertNotIn('retry', filter_properties)
def prep_resize(self, context, image, request_spec, filter_properties, instance, instance_type, reservations): """Tries to call schedule_prep_resize on the driver. Sets instance vm_state to ACTIVE on NoHostFound Sets vm_state to ERROR on other exceptions """ instance_uuid = instance['uuid'] with compute_utils.EventReporter(context, conductor_api.LocalAPI(), 'schedule', instance_uuid): try: request_spec['num_instances'] = len( request_spec['instance_uuids']) hosts = self.driver.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) attrs = ['metadata', 'system_metadata', 'info_cache', 'security_groups'] inst_obj = instance_obj.Instance._from_db_object( context, instance_obj.Instance(), instance, expected_attrs=attrs) self.compute_rpcapi.prep_resize( context, image, inst_obj, instance_type, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except exception.NoValidHost as ex: vm_state = instance.get('vm_state', vm_states.ACTIVE) self._set_vm_state_and_notify('prep_resize', {'vm_state': vm_state, 'task_state': None}, context, ex, request_spec) if reservations: QUOTAS.rollback(context, reservations) except Exception as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify('prep_resize', {'vm_state': vm_states.ERROR, 'task_state': None}, context, ex, request_spec) if reservations: QUOTAS.rollback(context, reservations)
def _execute(self): image = self.request_spec.image self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, legacy_spec, legacy_props) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # TODO(sbauza): Remove that RequestSpec rehydratation once # scheduler.utils methods use directly the NovaObject. self.request_spec = objects.RequestSpec.from_components( self.context, self.instance.uuid, image, self.flavor, self.instance.numa_topology, self.instance.pci_requests, legacy_props, None, self.instance.availability_zone) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() hosts = self.scheduler_client.select_destinations( self.context, self.request_spec) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def _execute(self): image = self.request_spec.get('image') self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance) scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties) scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid) # TODO(sbauza): Hydrate here the object until we modify the # scheduler.utils methods to directly use the RequestSpec object spec_obj = objects.RequestSpec.from_primitives( self.context, self.request_spec, self.filter_properties) hosts = self.scheduler_client.select_destinations( self.context, spec_obj) LOG.info('TTTType hosts:%s' % hosts) host_state = hosts[0] scheduler_utils.populate_filter_properties(self.filter_properties, host_state) # context is not serializable self.filter_properties.pop('context', None) #(host, node) = (host_state['host'], host_state['nodename']) LOG.info('TTTType host_state:%s' % host_state) # add by jiahua #(host, node) = (self.filter_properties['destination_host_name'], # self.filter_properties['destination_host_name']) # specify host use specify host, else use localhost #node = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ # else host_state['nodename'] #host = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ # else host_state['host'] node = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ else self.filter_properties['instance_host_name'] host = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \ else self.filter_properties['instance_host_name'] LOG.info('TTTType node:%s' % node) LOG.info('TTTType host:%s' % host) self.compute_rpcapi.prep_resize( self.context, image, self.instance, self.flavor, host, self.reservations, request_spec=self.request_spec, filter_properties=self.filter_properties, node=node, clean_shutdown=self.clean_shutdown)
def _provision_resource(self, context, selected_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=None): """Create the requested resource in this Zone.""" # NOTE(vish): add our current instance back into the request spec request_spec['instance_uuids'] = [instance_uuid] #NOTE(Yathi): Not using weights in solver scheduler, #but leaving in the payload payload = dict(request_spec=request_spec, weighted_host=dict(host=selected_host.host, weight=1), instance_id=instance_uuid) notifier.notify(context, notifier.publisher_id("scheduler"), 'scheduler.run_instance.scheduled', notifier.INFO, payload) # Update the metadata if necessary scheduler_hints = filter_properties.get('scheduler_hints') or {} group = scheduler_hints.get('group', None) values = None if group: values = request_spec['instance_properties']['system_metadata'] values.update({'group': group}) values = {'system_metadata': values} try: updated_instance = driver.instance_update_db(context, instance_uuid, extra_values=values) except exception.InstanceNotFound: LOG.warning(_("Instance disappeared during scheduling"), context=context, instance_uuid=instance_uuid) else: scheduler_utils.populate_filter_properties(filter_properties, selected_host) self.compute_rpcapi.run_instance(context, instance=updated_instance, host=selected_host.host, request_spec=request_spec, filter_properties=filter_properties, requested_networks=requested_networks, injected_files=injected_files, admin_password=admin_password, is_first_time=is_first_time, node=selected_host.nodename)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec(context, image, instances) try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_rpcapi.select_destinations(context, request_spec, filter_properties) except Exception as exc: for instance in instances: scheduler_driver.handle_schedule_error(context, exc, instance.uuid, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def _provision_resource(self, context, weighed_host, request_spec, filter_properties, requested_networks, injected_files, admin_password, is_first_time, instance_uuid=None, legacy_bdm_in_spec=True): """Create the requested resource in this Zone.""" # NOTE(vish): add our current instance back into the request spec request_spec['instance_uuids'] = [instance_uuid] payload = dict(request_spec=request_spec, weighted_host=weighed_host.to_dict(), instance_id=instance_uuid) self.notifier.info(context, 'scheduler.run_instance.scheduled', payload) # Update the metadata if necessary scheduler_hints = filter_properties.get('scheduler_hints') or {} try: updated_instance = driver.instance_update_db( context, instance_uuid) except exception.InstanceNotFound: LOG.warning(_("Instance disappeared during scheduling"), context=context, instance_uuid=instance_uuid) else: scheduler_utils.populate_filter_properties(filter_properties, weighed_host.obj) self.compute_rpcapi.run_instance( context, instance=updated_instance, host=weighed_host.obj.host, request_spec=request_spec, filter_properties=filter_properties, requested_networks=requested_networks, injected_files=injected_files, admin_password=admin_password, is_first_time=is_first_time, node=weighed_host.obj.nodename, legacy_bdm_in_spec=legacy_bdm_in_spec)
def _test_populate_filter_props(self, selection_obj=True, with_retry=True, force_hosts=None, force_nodes=None, no_limits=None): if force_hosts is None: force_hosts = [] if force_nodes is None: force_nodes = [] if with_retry: if ((len(force_hosts) == 1 and len(force_nodes) <= 1) or (len(force_nodes) == 1 and len(force_hosts) <= 1)): filter_properties = dict(force_hosts=force_hosts, force_nodes=force_nodes) elif len(force_hosts) > 1 or len(force_nodes) > 1: filter_properties = dict(retry=dict(hosts=[]), force_hosts=force_hosts, force_nodes=force_nodes) else: filter_properties = dict(retry=dict(hosts=[])) else: filter_properties = dict() if no_limits: fake_limits = None else: fake_limits = objects.SchedulerLimits(vcpu=1, disk_gb=2, memory_mb=3, numa_topology=None) selection = objects.Selection(service_host="fake-host", nodename="fake-node", limits=fake_limits) if not selection_obj: selection = selection.to_dict() fake_limits = fake_limits.to_dict() scheduler_utils.populate_filter_properties(filter_properties, selection) enable_retry_force_hosts = not force_hosts or len(force_hosts) > 1 enable_retry_force_nodes = not force_nodes or len(force_nodes) > 1 if with_retry or enable_retry_force_hosts or enable_retry_force_nodes: # So we can check for 2 hosts scheduler_utils.populate_filter_properties(filter_properties, selection) if force_hosts: expected_limits = None elif no_limits: expected_limits = {} elif isinstance(fake_limits, objects.SchedulerLimits): expected_limits = fake_limits.to_dict() else: expected_limits = fake_limits self.assertEqual(expected_limits, filter_properties.get('limits')) if (with_retry and enable_retry_force_hosts and enable_retry_force_nodes): self.assertEqual([['fake-host', 'fake-node'], ['fake-host', 'fake-node']], filter_properties['retry']['hosts']) else: self.assertNotIn('retry', filter_properties)
def _execute(self): # TODO(sbauza): Remove once all the scheduler.utils methods accept a # RequestSpec object in the signature. legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # TODO(gibi): We need to make sure that the requested_resources field # is re calculated based on neutron ports. self._restrict_request_spec_to_cell(legacy_props) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) self.request_spec.ensure_network_metadata(self.instance) compute_utils.heal_reqspec_is_bfv( self.context, self.request_spec, self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.query_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % {"instance_uuid": self.instance.uuid}) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( # NOTE(mriedem): Using request_spec.image here is potentially # dangerous if it is not kept up to date (i.e. rebuild/unshelve); # seems like the sane thing to do would be to pass the current # instance.image_meta since that is what MoveClaim will use for # any NUMA topology claims on the destination host... self.context, self.instance, self.request_spec.image, self.flavor, host, migration, request_spec=self.request_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance['vm_state'] if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor['id'] == instance['instance_type_id']: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = { 'vm_state': instance['vm_state'], 'task_state': None } self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback()
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): filter_properties = {} scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations, clean_shutdown): image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor) quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', updates, ex, request_spec) quotas.rollback() # if the flavor IDs match, it's migrate; otherwise resize if flavor['id'] == instance['instance_type_id']: msg = _("No valid host found for cold migrate") else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', updates, ex, request_spec) quotas.rollback() try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.prep_resize( context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node, clean_shutdown=clean_shutdown) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': instance.vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', updates, ex, request_spec) quotas.rollback()
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in requested_networks ]) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) request_spec = {} try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) request_spec = scheduler_utils.build_request_spec( context, image, instances) hosts = self._schedule_instances(context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify(context, instance.uuid, 'build_instances', updates, exc, request_spec) self._cleanup_allocated_networks(context, instance, requested_networks) return host_hypervisor = '' hosts_info = [] reselect_flag = self.need_select_image(request_spec) if reselect_flag: # Normal user need promoted privilege to search db elevated = context.elevated() hosts_info = db.compute_node_get_all(elevated) LOG.debug("hosts_info: {0}".format(hosts_info)) for (instance, host) in six.moves.zip(instances, hosts): if reselect_flag: for hi in hosts_info: if hi.get('service') and hi['service'].get( 'host') == host['host']: host_hypervisor = hi.get('hypervisor_type') LOG.debug( 'host_hypervisor: {0}'.format(host_hypervisor)) break image, instance, request_spec = self.select_image( context, image, host_hypervisor, instance, request_spec, filter_properties) LOG.debug("Final image: {0}".format(image.get('id'))) try: instance.save() instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance( context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def schedule_and_build_instances(self, context, build_requests, request_specs, image, admin_password, injected_files, requested_networks, block_device_mapping): legacy_spec = request_specs[0].to_legacy_request_spec_dict() try: hosts = self._schedule_instances( context, legacy_spec, request_specs[0].to_legacy_filter_properties_dict()) except Exception as exc: LOG.exception(_LE('Failed to schedule instances')) self._bury_in_cell0(context, request_specs[0], exc, build_requests=build_requests) return host_mapping_cache = {} for (build_request, request_spec, host) in six.moves.zip(build_requests, request_specs, hosts): filter_props = request_spec.to_legacy_filter_properties_dict() scheduler_utils.populate_filter_properties(filter_props, host) instance = build_request.get_new_instance(context) # Convert host from the scheduler into a cell record if host['host'] not in host_mapping_cache: try: host_mapping = objects.HostMapping.get_by_host( context, host['host']) host_mapping_cache[host['host']] = host_mapping except exception.HostMappingNotFound as exc: LOG.error( _LE('No host-to-cell mapping found for selected ' 'host %(host)s. Setup is incomplete.'), {'host': host['host']}) self._bury_in_cell0(context, request_spec, exc, build_requests=[build_request], instances=[instance]) continue else: host_mapping = host_mapping_cache[host['host']] cell = host_mapping.cell_mapping with obj_target_cell(instance, cell): instance.create() # send a state update notification for the initial create to # show it going from non-existent to BUILDING notifications.send_update_with_states(context, instance, None, vm_states.BUILDING, None, None, service="conductor") objects.InstanceAction.action_start(context, instance.uuid, instance_actions.CREATE, want_result=False) with obj_target_cell(instance, cell): instance_bdms = self._create_block_device_mapping( instance.flavor, instance.uuid, block_device_mapping) # Update mapping for instance. Normally this check is guarded by # a try/except but if we're here we know that a newer nova-api # handled the build process and would have created the mapping inst_mapping = objects.InstanceMapping.get_by_instance_uuid( context, instance.uuid) inst_mapping.cell_mapping = cell inst_mapping.save() try: build_request.destroy() except exception.BuildRequestNotFound: # This indicates an instance deletion request has been # processed, and the build should halt here. Clean up the # bdm and instance record. with obj_target_cell(instance, cell): try: instance.destroy() except exception.InstanceNotFound: pass except exception.ObjectActionError: # NOTE(melwitt): Instance became scheduled during # the destroy, "host changed". Refresh and re-destroy. try: instance.refresh() instance.destroy() except exception.InstanceNotFound: pass for bdm in instance_bdms: with obj_target_cell(bdm, cell): try: bdm.destroy() except exception.ObjectActionError: pass return # NOTE(danms): Compute RPC expects security group names or ids # not objects, so convert this to a list of names until we can # pass the objects. legacy_secgroups = [ s.identifier for s in request_spec.security_groups ] with obj_target_cell(instance, cell): self.compute_rpcapi.build_and_run_instance( context, instance=instance, image=image, request_spec=request_spec, filter_properties=filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=legacy_secgroups, block_device_mapping=instance_bdms, host=host['host'], node=host['nodename'], limits=host['limits'])
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) migration = self._preallocate_migration() hosts = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid]) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize(self.context, self.instance, legacy_spec['image'], self.flavor, host, migration, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec( context, image, instances) # NOTE(sbauza): filter_properties['hints'] can be None hints = filter_properties.get('scheduler_hints', {}) or {} group_hint = hints.get('group') group_hosts = filter_properties.get('group_hosts') group_info = scheduler_utils.setup_instance_group( context, group_hint, group_hosts) if isinstance(group_info, tuple): filter_properties['group_updated'] = True (filter_properties['group_hosts'], filter_properties['group_policies']) = group_info # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in requested_networks ]) try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. # (luzhq) 验证重试策略 # 更新filter_properties中的重试属性,若当前为重试部署则同时检测当前 # 的重试次数是否超过最大重试次数,需要注意的是:这里使用instances[0] # 表示如重试的话只会有一个instance重试 scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) except Exception as exc: for instance in instances: scheduler_driver.handle_schedule_error(context, exc, instance.uuid, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) # (luzhq) self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.compute_rpcapi.build_and_run_instance( context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def unshelve_instance(self, context, instance, request_spec=None): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter( context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is # old. We need to mock that the old way filter_properties = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance]) else: # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host request_spec.reset_forced_destinations() # TODO(sbauza): Provide directly the RequestSpec object # when _schedule_instances(), # populate_filter_properties and populate_retry() # accept it filter_properties = request_spec.\ to_legacy_filter_properties_dict() request_spec = request_spec.\ to_legacy_request_spec_dict() scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances( context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) # NOTE(takashin): In the case that the target host is specified, # if the migration is failed, it is not necessary to retry # the cold migration to the same host. So make sure that # reschedule will not occur. if 'host' in self.request_spec.requested_destination: legacy_props.pop('retry', None) self.request_spec.retry = None else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) migration = self._preallocate_migration() self.request_spec.ensure_project_id(self.instance) # For now, don't request alternates. A later patch in the series will # modify migration to use alternates instead of calling the scheduler # again. selection_lists = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=False) # We only need the first item in the first list, as there is only one # instance, and we don't care about any alternates. selection = selection_lists[0][0] scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize(self.context, self.instance, legacy_spec['image'], self.flavor, host, migration, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown)
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList.from_tuples( requested_networks) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) request_spec = {} try: # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. request_spec = scheduler_utils.build_request_spec( context, image, instances) scheduler_utils.populate_retry( filter_properties, instances[0].uuid) hosts = self._schedule_instances( context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, exc, request_spec) try: # If the BuildRequest stays around then instance show/lists # will pull from it rather than the errored instance. self._destroy_build_request(context, instance) except exception.BuildRequestNotFound: pass self._cleanup_allocated_networks( context, instance, requested_networks) return for (instance, host) in six.moves.zip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) # This is populated in scheduler_utils.populate_retry num_attempts = local_filter_props.get('retry', {}).get('num_attempts', 1) if num_attempts <= 1: # If this is a reschedule the instance is already mapped to # this cell and the BuildRequest is already deleted so ignore # the logic below. inst_mapping = self._populate_instance_mapping(context, instance, host) try: self._destroy_build_request(context, instance) except exception.BuildRequestNotFound: # This indicates an instance delete has been requested in # the API. Stop the build, cleanup the instance_mapping and # potentially the block_device_mappings # TODO(alaski): Handle block_device_mapping cleanup if inst_mapping: inst_mapping.destroy() return self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) # NOTE(takashin): In the case that the target host is specified, # if the migration is failed, it is not necessary to retry # the cold migration to the same host. So make sure that # reschedule will not occur. if 'host' in self.request_spec.requested_destination: legacy_props.pop('retry', None) self.request_spec.retry = None else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec, self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % { "instance_uuid": self.instance.uuid }) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) LOG.debug( "Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize(self.context, self.instance, legacy_spec['image'], self.flavor, host, migration, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def unshelve_instance(self, context, instance, request_spec=None): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is # old. We need to mock that the old way filter_properties = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance]) else: # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host request_spec.reset_forced_destinations() # TODO(sbauza): Provide directly the RequestSpec object # when _schedule_instances(), # populate_filter_properties and populate_retry() # accept it filter_properties = request_spec.\ to_legacy_filter_properties_dict() request_spec = request_spec.\ to_legacy_request_spec_dict() scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances(context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def schedule_and_build_instances(self, context, build_requests, request_specs, image, admin_password, injected_files, requested_networks, block_device_mapping): legacy_spec = request_specs[0].to_legacy_request_spec_dict() try: hosts = self._schedule_instances(context, legacy_spec, request_specs[0].to_legacy_filter_properties_dict()) except Exception as exc: LOG.exception(_LE('Failed to schedule instances')) self._bury_in_cell0(context, request_specs[0], exc, build_requests=build_requests) return host_mapping_cache = {} for (build_request, request_spec, host) in six.moves.zip( build_requests, request_specs, hosts): filter_props = request_spec.to_legacy_filter_properties_dict() instance = build_request.get_new_instance(context) scheduler_utils.populate_retry(filter_props, instance.uuid) scheduler_utils.populate_filter_properties(filter_props, host) # Convert host from the scheduler into a cell record if host['host'] not in host_mapping_cache: try: host_mapping = objects.HostMapping.get_by_host( context, host['host']) host_mapping_cache[host['host']] = host_mapping except exception.HostMappingNotFound as exc: LOG.error(_LE('No host-to-cell mapping found for selected ' 'host %(host)s. Setup is incomplete.'), {'host': host['host']}) self._bury_in_cell0(context, request_spec, exc, build_requests=[build_request], instances=[instance]) continue else: host_mapping = host_mapping_cache[host['host']] cell = host_mapping.cell_mapping # Before we create the instance, let's make one final check that # the build request is still around and wasn't deleted by the user # already. try: objects.BuildRequest.get_by_instance_uuid( context, instance.uuid) except exception.BuildRequestNotFound: # the build request is gone so we're done for this instance LOG.debug('While scheduling instance, the build request ' 'was already deleted.', instance=instance) continue else: with obj_target_cell(instance, cell): instance.create() # send a state update notification for the initial create to # show it going from non-existent to BUILDING notifications.send_update_with_states(context, instance, None, vm_states.BUILDING, None, None, service="conductor") with obj_target_cell(instance, cell): objects.InstanceAction.action_start( context, instance.uuid, instance_actions.CREATE, want_result=False) instance_bdms = self._create_block_device_mapping( instance.flavor, instance.uuid, block_device_mapping) # Update mapping for instance. Normally this check is guarded by # a try/except but if we're here we know that a newer nova-api # handled the build process and would have created the mapping inst_mapping = objects.InstanceMapping.get_by_instance_uuid( context, instance.uuid) inst_mapping.cell_mapping = cell inst_mapping.save() if not self._delete_build_request( context, build_request, instance, cell, instance_bdms): # The build request was deleted before/during scheduling so # the instance is gone and we don't have anything to build for # this one. continue # NOTE(danms): Compute RPC expects security group names or ids # not objects, so convert this to a list of names until we can # pass the objects. legacy_secgroups = [s.identifier for s in request_spec.security_groups] with obj_target_cell(instance, cell): self.compute_rpcapi.build_and_run_instance( context, instance=instance, image=image, request_spec=request_spec, filter_properties=filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=legacy_secgroups, block_device_mapping=instance_bdms, host=host['host'], node=host['nodename'], limits=host['limits'])
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec(context, image, instances) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList( objects=[objects.NetworkRequest.from_tuple(t) for t in requested_networks]) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, exc, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance(context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def _execute(self): # NOTE(sbauza): Force_hosts/nodes needs to be reset if we want to make # sure that the next destination is not forced to be the original host. # This needs to be done before the populate_retry call otherwise # retries will be disabled if the server was created with a forced # host/node. self.request_spec.reset_forced_destinations() # TODO(sbauza): Remove once all the scheduler.utils methods accept a # RequestSpec object in the signature. legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) port_res_req = self.network_api.get_requested_resource_for_instance( self.context, self.instance.uuid) # NOTE(gibi): When cyborg or other module wants to handle similar # non-nova resources then here we have to collect all the external # resource requests in a single list and add them to the RequestSpec. self.request_spec.requested_resources = port_res_req self._restrict_request_spec_to_cell(legacy_props) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) self.request_spec.ensure_network_metadata(self.instance) compute_utils.heal_reqspec_is_bfv( self.context, self.request_spec, self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection = self._schedule() else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. selection = self._reschedule() scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) # The availability_zone field was added in v1.1 of the Selection # object so make sure to handle the case where it is missing. if 'availability_zone' in selection: self.instance.availability_zone = selection.availability_zone else: self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( # NOTE(mriedem): Using request_spec.image here is potentially # dangerous if it is not kept up to date (i.e. rebuild/unshelve); # seems like the sane thing to do would be to pass the current # instance.image_meta since that is what MoveClaim will use for # any NUMA topology claims on the destination host... self.context, self.instance, self.request_spec.image, self.flavor, host, migration, request_spec=self.request_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def _execute(self): # TODO(sbauza): Remove once all the scheduler.utils methods accept a # RequestSpec object in the signature. legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # TODO(gibi): We need to make sure that the requested_resources field # is re calculated based on neutron ports. self._restrict_request_spec_to_cell(legacy_props) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) self.request_spec.ensure_network_metadata(self.instance) compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec, self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.query_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % { "instance_uuid": self.instance.uuid }) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) LOG.debug( "Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( # NOTE(mriedem): Using request_spec.image here is potentially # dangerous if it is not kept up to date (i.e. rebuild/unshelve); # seems like the sane thing to do would be to pass the current # instance.image_meta since that is what MoveClaim will use for # any NUMA topology claims on the destination host... self.context, self.instance, self.request_spec.image, self.flavor, host, migration, request_spec=self.request_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) snapshot_id = sys_meta.get('shelved_image_id') if snapshot_id: self._delete_image(context, snapshot_id) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: try: with compute_utils.EventReporter(context, self.db, 'get_image_info', instance.uuid): image = self._get_image(context, sys_meta['shelved_image_id']) except exception.ImageNotFound: with excutils.save_and_reraise_exception(): LOG.error(_('Unshelve attempted but vm_state not SHELVED ' 'or SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() try: with compute_utils.EventReporter(context, self.db, 'schedule_instances', instance.uuid): filter_properties = {} hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except exception.NoValidHost as ex: instance.task_state = None instance.save() LOG.warning(_("No valid host found for unshelve instance"), instance=instance) return else: LOG.error(_('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return for key in ['shelved_at', 'shelved_image_id', 'shelved_host']: if key in sys_meta: del (sys_meta[key]) instance.system_metadata = sys_meta instance.save()
def build_instances(self, context, instances, image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping=None, legacy_bdm=True): # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec( context, image, instances) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, objects.NetworkRequestList)): requested_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in requested_networks ]) # TODO(melwitt): Remove this in version 2.0 of the RPC API flavor = filter_properties.get('instance_type') if flavor and not isinstance(flavor, objects.Flavor): # Code downstream may expect extra_specs to be populated since it # is receiving an object, so lookup the flavor to ensure this. flavor = objects.Flavor.get_by_id(context, flavor['id']) filter_properties = dict(filter_properties, instance_type=flavor) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. scheduler_utils.populate_retry(filter_properties, instances[0].uuid) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) except Exception as exc: updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify(context, instance.uuid, 'build_instances', updates, exc, request_spec) return for (instance, host) in itertools.izip(instances, hosts): try: instance.refresh() except (exception.InstanceNotFound, exception.InstanceInfoCacheNotFound): LOG.debug('Instance deleted during build', instance=instance) continue local_filter_props = copy.deepcopy(filter_properties) scheduler_utils.populate_filter_properties(local_filter_props, host) # The block_device_mapping passed from the api doesn't contain # instance specific information bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) self.compute_rpcapi.build_and_run_instance( context, instance=instance, host=host['host'], image=image, request_spec=request_spec, filter_properties=local_filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=security_groups, block_device_mapping=bdms, node=host['nodename'], limits=host['limits'])
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, # 'populate_retry' need not be executed. if not ('requested_destination' in self.request_spec and self.request_spec.requested_destination and 'host' in self.request_spec.requested_destination): scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) # NOTE(takashin): In the case that the target host is specified, # if the migration is failed, it is not necessary to retry # the cold migration to the same host. So make sure that # reschedule will not occur. if 'host' in self.request_spec.requested_destination: legacy_props.pop('retry', None) self.request_spec.retry = None else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) # Once _preallocate_migration() is done, the source node allocation is # moved from the instance consumer to the migration record consumer, # and the instance consumer doesn't have any allocations. If this is # the first time through here (not a reschedule), select_destinations # below will allocate resources on the selected destination node for # the instance consumer. If we're rescheduling, host_list is not None # and we'll call claim_resources for the instance and the selected # alternate. If we exhaust our alternates and raise MaxRetriesExceeded, # the rollback() method should revert the allocation swaparoo and move # the source node allocation from the migration record back to the # instance record. migration = self._preallocate_migration() self.request_spec.ensure_project_and_user_id(self.instance) # On an initial call to migrate, 'self.host_list' will be None, so we # have to call the scheduler to get a list of acceptable hosts to # migrate to. That list will consist of a selected host, along with # zero or more alternates. On a reschedule, though, the alternates will # be passed to this object and stored in 'self.host_list', so we can # pop the first alternate from the list to use for the destination, and # pass the remaining alternates to the compute. if self.host_list is None: selection_lists = self.scheduler_client.select_destinations( self.context, self.request_spec, [self.instance.uuid], return_objects=True, return_alternates=True) # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] # The selected host is the first item in the list, with the # alternates being the remainder of the list. selection, self.host_list = selection_list[0], selection_list[1:] else: # This is a reschedule that will use the supplied alternate hosts # in the host_list as destinations. Since the resources on these # alternates may have been consumed and might not be able to # support the migrated instance, we need to first claim the # resources to verify the host still has sufficient availabile # resources. elevated = self.context.elevated() host_available = False while self.host_list and not host_available: selection = self.host_list.pop(0) if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: alloc_req = None if alloc_req: # If this call succeeds, the resources on the destination # host will be claimed by the instance. host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, selection.allocation_request_version) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an # allocation_request to claim with. For those cases, # there is no concept of claiming, so just assume that # the host is valid. host_available = True # There are no more available hosts. Raise a MaxRetriesExceeded # exception in that case. if not host_available: reason = ("Exhausted all hosts available for retrying build " "failures for instance %(instance_uuid)s." % {"instance_uuid": self.instance.uuid}) raise exception.MaxRetriesExceeded(reason=reason) scheduler_utils.populate_filter_properties(legacy_props, selection) # context is not serializable legacy_props.pop('context', None) (host, node) = (selection.service_host, selection.nodename) self.instance.availability_zone = ( availability_zones.get_host_availability_zone( self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( self.context, self.instance, legacy_spec['image'], self.flavor, host, migration, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list)
def reschedule(self, context, instance): """Rescheduler the given instance.""" # if reschedule process rebuild, instance is possibly in REBUILD- task # state for a long time, or just stuck in the task state, while a # second reschedule request triggered if vm status is ERROR or the host # is fault, we should ignore this kind of duplicated request if instance['task_state'] in (task_states.REBUILDING, task_states.REBUILD_BLOCK_DEVICE_MAPPING, task_states.REBUILD_SPAWNING, task_states.DELETING): LOG.warning(_('instance task_state is %s, ignore this request'), instance['task_state'], instance=instance) return LOG.info('reschedule instance', instance=instance) orig_image_ref = instance['image_ref'] or '' current_instance_type = flavors.extract_flavor(instance) # Ignore current host filter_properties = {'ignore_hosts': []} if not CONF.allow_reschedule_to_same_host: filter_properties['ignore_hosts'].append(instance['host']) image_ref = instance.image_ref image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance) request_spec = scheduler_utils.build_request_spec( context, image, [instance], instance_type=current_instance_type) # Get scheduler_hint info inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid( context, instance.uuid) injected_files = self.db.injected_files_get_by_instance_uuid( context, instance.uuid) request_networks = [] if inst_extra: scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints or '{}') request_networks = jsonutils.loads(inst_extra.request_network or '[]') else: scheduler_hints = {} pci_requests = objects.InstancePCIRequests.\ get_by_instance_uuid_and_newness( context, instance['uuid'], False) if pci_requests: filter_properties['pci_requests'] = pci_requests filter_properties['scheduler_hints'] = scheduler_hints LOG.info("reschedule filter_properties %s", filter_properties, instance=instance) self._record_action_start(context, instance, hw_actions.RESCHEDULE) try: hosts = self._select_destinations(context, instance, request_spec, filter_properties) host_state = hosts[0]['host'] LOG.info("HA selected host %s", host_state, instance=instance) except exception.NoValidHost as ex: LOG.warning(_("No valid host found"), instance=instance) if instance['host']: self._try_local_reboot(context, instance, 'HARD') return bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) LOG.info("instance bdms %s", jsonutils.to_primitive(bdms), instance=instance) scheduler_utils.populate_filter_properties(filter_properties, hosts[0]) def _get_network_info(nw_info): pci_req_id = None if len(nw_info) > 3: pci_req_id = nw_info[3] return (nw_info[0], nw_info[1], nw_info[2], pci_req_id) request_networks = [_get_network_info(n) for n in request_networks] alive_dict = {'alive': False, 'fault': False, 'count': 0} def async_check_live(): try: self.compute_rpcapi.check_alive(context, host_state, 'nova-api') alive_dict['alive'] = True except Exception as e: LOG.error(_LE('check alive fault, host %s, %s'), host_state, e) alive_dict['fault'] = True def _loop_check(): if alive_dict['fault']: raise loopingcall.LoopingCallDone() if alive_dict['alive']: LOG.debug('compute service alive, host %s', host_state, instance=instance) raise loopingcall.LoopingCallDone() if alive_dict['count'] == 120: LOG.debug('check alive timeout, host %s', host_state, instance=instance) raise loopingcall.LoopingCallDone() alive_dict['count'] += 1 # Clear the resource of instance on the source host if instance['host'] and self.judge_branch(context, instance, request_networks): instance.task_state = task_states.REBUILDING instance.save() # in some extreme case rpc message will stack on HA dest host, to # avoid that we change rpc api build_and_run_instance to sync # 'call' instead of async 'cast', but we can't block outside # request, so use async way to call rpc method def _async_reschedule(): # check dest compute service is alive utils.spawn_n(async_check_live) timer = loopingcall.FixedIntervalLoopingCall(_loop_check) timer.start(interval=1).wait() if not alive_dict['alive']: LOG.warn( '%s compute service seems down, revert instance ' 'task state', host_state, instance=instance) instance.task_state = None instance.save() return LOG.info('reschedule instance to host %s', host_state, instance=instance) try: self.compute_rpcapi.sync_reschedule_instance( context, instance=instance, new_pass=None, injected_files=jsonutils.loads(injected_files), image_ref=image_ref, orig_image_ref=orig_image_ref, orig_sys_metadata=None, bdms=bdms, host=host_state, filter_properties=filter_properties) except Exception as e: LOG.error(_LE('reschedule call failed: %s'), e) self.db.instance_update(context, instance.uuid, task_state=None) utils.spawn_n(_async_reschedule) else: security_groups = self.db.security_group_get_by_instance( context, instance.uuid) block_device_mapping = \ self.db.block_device_mapping_get_all_by_instance( context, instance.uuid) request_spec.update({ 'block_device_mapping': block_device_mapping, 'security_group': security_groups }) # TODO(): Remove this in version 2.0 of the RPC API if (request_networks and not isinstance( request_networks, objects.NetworkRequestList)): request_networks = objects.NetworkRequestList(objects=[ objects.NetworkRequest.from_tuple(t) for t in request_networks ]) # in some extreme case rpc message will stack on HA dest host, to # avoid that we change rpc api build_and_run_instance to sync # 'call' instead of async 'cast', but we can't block outside # request, so use async way to call rpc method def _async_build_and_run_instance(): # check dest compute service is alive utils.spawn_n(async_check_live) timer = loopingcall.FixedIntervalLoopingCall(_loop_check) timer.start(interval=1).wait() if not alive_dict['alive']: LOG.warn( '%s compute service seems down, revert instance ' 'task state', host_state, instance=instance) instance.task_state = None instance.save() return LOG.info('build instance on host %s', host_state, instance=instance) self.compute_rpcapi.sync_build_and_run_instance( context, instance=instance, host=host_state, image=image, request_spec=request_spec, filter_properties=filter_properties, admin_password=None, injected_files=jsonutils.loads(injected_files), requested_networks=request_networks, security_groups=security_groups, block_device_mapping=bdms, node=host_state, limits=hosts[0]['limits']) utils.spawn_n(_async_build_and_run_instance)
def _execute(self): # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec # object in the signature and all the scheduler.utils methods too legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # WRS: add hosts to Server group host list for group members # that are migrating in progress if 'group_members' in legacy_props: metadetails = self.request_spec.instance_group['metadetails'] is_best_effort = strutils.bool_from_string( metadetails.get('wrs-sg:best_effort', 'False')) if ('anti-affinity' in self.request_spec.instance_group['policies'] and not is_best_effort): group_members = self.request_spec.instance_group['members'] for instance_uuid in group_members: filters = { 'migration_type': 'migration', 'instance_uuid': instance_uuid, 'status': [ 'queued', 'pre-migrating', 'migrating', 'post-migrating', 'finished' ] } migrations = objects.MigrationList.get_by_filters( self.context, filters) for migration in migrations: if migration['source_compute'] not in \ self.request_spec.instance_group['hosts']: self.request_spec.instance_group['hosts'].append( migration['source_compute']) if (migration['dest_compute'] and (migration['dest_compute'] not in self.request_spec.instance_group['hosts'])): self.request_spec.instance_group['hosts'].append( migration['dest_compute']) # refresh legacy_spec and legacy_props with latest request_spec legacy_spec = self.request_spec.to_legacy_request_spec_dict() legacy_props = self.\ request_spec.to_legacy_filter_properties_dict() scheduler_utils.populate_retry(legacy_props, self.instance.uuid) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host self.request_spec.reset_forced_destinations() # NOTE(danms): Right now we only support migrate to the same # cell as the current instance, so request that the scheduler # limit thusly. instance_mapping = objects.InstanceMapping.get_by_instance_uuid( self.context, self.instance.uuid) LOG.debug('Requesting cell %(cell)s while migrating', {'cell': instance_mapping.cell_mapping.identity}, instance=self.instance) if ('requested_destination' in self.request_spec and self.request_spec.requested_destination): self.request_spec.requested_destination.cell = ( instance_mapping.cell_mapping) else: self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) self.request_spec.ensure_project_id(self.instance) # WRS: determine offline cpus due to scaling to be used to calculate # placement service resource claim in scheduler self.request_spec.offline_cpus = \ scheduler_utils.determine_offline_cpus( self.flavor, self.instance.numa_topology) # NOTE(danms): We don't pass enough information to the scheduler to # know that we have a boot-from-volume request. # TODO(danms): We need to pass more context to the scheduler here # in order to (a) handle boot-from-volume instances, as well as # (b) know which volume provider to request resource from. request_spec_copy = self.request_spec if self.instance.is_volume_backed(): LOG.debug('Requesting zero root disk for ' 'boot-from-volume instance') # Clone this so we don't mutate the RequestSpec that was passed in request_spec_copy = self.request_spec.obj_clone() request_spec_copy.flavor.root_gb = 0 hosts = self.scheduler_client.select_destinations( self.context, request_spec_copy, [self.instance.uuid]) host_state = hosts[0] scheduler_utils.populate_filter_properties(legacy_props, host_state) # context is not serializable legacy_props.pop('context', None) (host, node) = (host_state['host'], host_state['nodename']) self.instance.availability_zone = ( availability_zones.get_host_availability_zone(self.context, host)) # FIXME(sbauza): Serialize/Unserialize the legacy dict because of # oslo.messaging #1529084 to transform datetime values into strings. # tl;dr: datetimes in dicts are not accepted as correct values by the # rpc fake driver. legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) self.compute_rpcapi.prep_resize(self.context, self.instance, legacy_spec['image'], self.flavor, host, self.reservations, request_spec=legacy_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown) # WRS: return request_spec for save to db but need to clear retry and # instance_group hosts so that next request starts cleanly self.request_spec.retry = None if self.request_spec.instance_group: self.request_spec.instance_group.hosts = [] return self.request_spec
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) snapshot_id = sys_meta.get('shelved_image_id') if snapshot_id: self._delete_image(context, snapshot_id) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image_id = sys_meta.get('shelved_image_id') with compute_utils.EventReporter(context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() if image_id: reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id else: reason = _('Unshelve attempted but the image_id is ' 'not provided') LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): filter_properties = {} hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except exception.NoValidHost: instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return for key in ['shelved_at', 'shelved_image_id', 'shelved_host']: if key in sys_meta: del (sys_meta[key]) instance.system_metadata = sys_meta instance.save()
def schedule_and_build_instances(self, context, build_requests, request_specs, image, admin_password, injected_files, requested_networks, block_device_mapping): legacy_spec = request_specs[0].to_legacy_request_spec_dict() try: hosts = self._schedule_instances(context, legacy_spec, request_specs[0].to_legacy_filter_properties_dict()) except Exception as exc: LOG.exception(_LE('Failed to schedule instances')) self._bury_in_cell0(context, request_specs[0], exc, build_requests=build_requests) return host_mapping_cache = {} for (build_request, request_spec, host) in six.moves.zip( build_requests, request_specs, hosts): filter_props = request_spec.to_legacy_filter_properties_dict() scheduler_utils.populate_filter_properties(filter_props, host) instance = build_request.get_new_instance(context) # Convert host from the scheduler into a cell record if host['host'] not in host_mapping_cache: try: host_mapping = objects.HostMapping.get_by_host( context, host['host']) host_mapping_cache[host['host']] = host_mapping except exception.HostMappingNotFound as exc: LOG.error(_LE('No host-to-cell mapping found for selected ' 'host %(host)s. Setup is incomplete.'), {'host': host['host']}) self._bury_in_cell0(context, request_spec, exc, build_requests=[build_request], instances=[instance]) continue else: host_mapping = host_mapping_cache[host['host']] cell = host_mapping.cell_mapping with obj_target_cell(instance, cell): instance.create() # send a state update notification for the initial create to # show it going from non-existent to BUILDING notifications.send_update_with_states(context, instance, None, vm_states.BUILDING, None, None, service="conductor") objects.InstanceAction.action_start( context, instance.uuid, instance_actions.CREATE, want_result=False) with obj_target_cell(instance, cell): instance_bdms = self._create_block_device_mapping( instance.flavor, instance.uuid, block_device_mapping) # Update mapping for instance. Normally this check is guarded by # a try/except but if we're here we know that a newer nova-api # handled the build process and would have created the mapping inst_mapping = objects.InstanceMapping.get_by_instance_uuid( context, instance.uuid) inst_mapping.cell_mapping = cell inst_mapping.save() try: build_request.destroy() except exception.BuildRequestNotFound: # This indicates an instance deletion request has been # processed, and the build should halt here. Clean up the # bdm and instance record. with obj_target_cell(instance, cell): try: instance.destroy() except exception.InstanceNotFound: pass for bdm in instance_bdms: with obj_target_cell(bdm, cell): try: bdm.destroy() except exception.ObjectActionError: pass return # NOTE(danms): Compute RPC expects security group names or ids # not objects, so convert this to a list of names until we can # pass the objects. legacy_secgroups = [s.identifier for s in request_spec.security_groups] with obj_target_cell(instance, cell): self.compute_rpcapi.build_and_run_instance( context, instance=instance, image=image, request_spec=request_spec, filter_properties=filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=legacy_secgroups, block_device_mapping=instance_bdms, host=host['host'], node=host['nodename'], limits=host['limits'])