def _test_populate_filter_props(self, host_state_obj=True,
                                    with_retry=True):
        if with_retry:
            filter_properties = dict(retry=dict(hosts=[]))
        else:
            filter_properties = dict()

        if host_state_obj:
            class host_state(object):
                host = 'fake-host'
                nodename = 'fake-node'
                limits = 'fake-limits'
        else:
            host_state = dict(host='fake-host',
                              nodename='fake-node',
                              limits='fake-limits')

        scheduler_utils.populate_filter_properties(filter_properties,
                                                   host_state)
        if with_retry:
            # So we can check for 2 hosts
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)

        self.assertEqual('fake-limits', filter_properties['limits'])
        if with_retry:
            self.assertEqual([['fake-host', 'fake-node'],
                              ['fake-host', 'fake-node']],
                             filter_properties['retry']['hosts'])
        else:
            self.assertNotIn('retry', filter_properties)
Beispiel #2
0
    def schedule_prep_resize(self, context, image, request_spec,
                             filter_properties, instance, instance_type,
                             reservations):
        """Select a target for resize.

        Selects a target host for the instance, post-resize, and casts
        the prep_resize operation to it.
        """

        weighed_hosts = self._schedule(context, request_spec,
                filter_properties, [instance['uuid']])
        if not weighed_hosts:
            raise exception.NoValidHost(reason="")
        weighed_host = weighed_hosts.pop(0)

        scheduler_utils.populate_filter_properties(filter_properties,
                weighed_host.obj)

        # context is not serializable
        filter_properties.pop('context', None)

        # Forward off to the host
        self.compute_rpcapi.prep_resize(context, image, instance,
                instance_type, weighed_host.obj.host, reservations,
                request_spec=request_spec, filter_properties=filter_properties,
                node=weighed_host.obj.nodename)
Beispiel #3
0
    def _execute(self):
        self.quotas = objects.Quotas.from_reservations(self.context,
                                                       self.reservations,
                                                       instance=self.instance)
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, legacy_spec,
                                             legacy_props)
        scheduler_utils.populate_retry(legacy_props,
                                       self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec and
                self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        hosts = self.scheduler_client.select_destinations(
            self.context, self.request_spec)
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(legacy_props,
                                                   host_state)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (host_state['host'], host_state['nodename'])

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(
                self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        self.compute_rpcapi.prep_resize(
            self.context, self.instance, legacy_spec['image'],
            self.flavor, host, self.reservations,
            request_spec=legacy_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown)
Beispiel #4
0
    def _provision_resource(self, context, weighed_host, request_spec,
            filter_properties, requested_networks, injected_files,
            admin_password, is_first_time, instance_uuid=None,
            legacy_bdm_in_spec=True):
        """Create the requested resource in this Zone."""
        # NOTE(vish): add our current instance back into the request spec
        request_spec['instance_uuids'] = [instance_uuid]
        payload = dict(request_spec=request_spec,
                       weighted_host=weighed_host.to_dict(),
                       instance_id=instance_uuid)
        self.notifier.info(context,
                           'scheduler.run_instance.scheduled', payload)

        # Update the metadata if necessary
        try:
            updated_instance = driver.instance_update_db(context,
                                                         instance_uuid)
        except exception.InstanceNotFound:
            LOG.warning(_("Instance disappeared during scheduling"),
                        context=context, instance_uuid=instance_uuid)

        else:
            scheduler_utils.populate_filter_properties(filter_properties,
                    weighed_host.obj)

            self.compute_rpcapi.run_instance(context,
                    instance=updated_instance,
                    host=weighed_host.obj.host,
                    request_spec=request_spec,
                    filter_properties=filter_properties,
                    requested_networks=requested_networks,
                    injected_files=injected_files,
                    admin_password=admin_password, is_first_time=is_first_time,
                    node=weighed_host.obj.nodename,
                    legacy_bdm_in_spec=legacy_bdm_in_spec)
Beispiel #5
0
    def _execute(self):
        image = self.request_spec.get("image")
        self.quotas = objects.Quotas.from_reservations(self.context, self.reservations, instance=self.instance)
        scheduler_utils.setup_instance_group(self.context, self.request_spec, self.filter_properties)
        scheduler_utils.populate_retry(self.filter_properties, self.instance.uuid)
        hosts = self.scheduler_client.select_destinations(self.context, self.request_spec, self.filter_properties)
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(self.filter_properties, host_state)
        # context is not serializable
        self.filter_properties.pop("context", None)

        (host, node) = (host_state["host"], host_state["nodename"])
        self.compute_rpcapi.prep_resize(
            self.context,
            image,
            self.instance,
            self.flavor,
            host,
            self.reservations,
            request_spec=self.request_spec,
            filter_properties=self.filter_properties,
            node=node,
            clean_shutdown=self.clean_shutdown,
        )
    def _provision_resource(self, context, weighed_host, request_spec,
            filter_properties, requested_networks, injected_files,
            admin_password, is_first_time, instance_uuid=None):
        """Create the requested resource in this Zone."""
        # NOTE(vish): add our current instance back into the request spec
        request_spec['instance_uuids'] = [instance_uuid]
        payload = dict(request_spec=request_spec,
                       weighted_host=weighed_host.to_dict(),
                       instance_id=instance_uuid)
        notifier.notify(context, notifier.publisher_id("scheduler"),
                        'scheduler.run_instance.scheduled', notifier.INFO,
                        payload)

        # Update the metadata if necessary
        scheduler_hints = filter_properties.get('scheduler_hints') or {}
        group = scheduler_hints.get('group', None)
        values = None
        if group:
            values = request_spec['instance_properties']['system_metadata']
            values.update({'group': group})
            values = {'system_metadata': values}

        updated_instance = driver.instance_update_db(context,
                instance_uuid, extra_values=values)

        scheduler_utils.populate_filter_properties(filter_properties,
                weighed_host.obj)

        self.compute_rpcapi.run_instance(context, instance=updated_instance,
                host=weighed_host.obj.host,
                request_spec=request_spec, filter_properties=filter_properties,
                requested_networks=requested_networks,
                injected_files=injected_files,
                admin_password=admin_password, is_first_time=is_first_time,
                node=weighed_host.obj.nodename)
Beispiel #7
0
    def _execute(self):
        image = self.request_spec.get('image')
        self.quotas = objects.Quotas.from_reservations(self.context,
                                                       self.reservations,
                                                       instance=self.instance)
        scheduler_utils.setup_instance_group(self.context, self.request_spec,
                                             self.filter_properties)
        scheduler_utils.populate_retry(self.filter_properties,
                                       self.instance.uuid)
        # TODO(sbauza): Hydrate here the object until we modify the
        # scheduler.utils methods to directly use the RequestSpec object
        spec_obj = objects.RequestSpec.from_primitives(
            self.context, self.request_spec, self.filter_properties)
        hosts = self.scheduler_client.select_destinations(
            self.context, spec_obj)
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(self.filter_properties,
                                                   host_state)
        # context is not serializable
        self.filter_properties.pop('context', None)

        (host, node) = (host_state['host'], host_state['nodename'])
        self.compute_rpcapi.prep_resize(
            self.context, image, self.instance, self.flavor, host,
            self.reservations, request_spec=self.request_spec,
            filter_properties=self.filter_properties, node=node,
            clean_shutdown=self.clean_shutdown)
Beispiel #8
0
    def unshelve_instance(self, context, instance):
        sys_meta = instance.system_metadata

        def safe_image_show(ctx, image_id):
            if image_id:
                return self.image_api.get(ctx, image_id, show_deleted=False)
            else:
                raise exception.ImageNotFound(image_id="")

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            image = None
            image_id = sys_meta.get("shelved_image_id")
            # No need to check for image if image_id is None as
            # "shelved_image_id" key is not set for volume backed
            # instance during the shelve process
            if image_id:
                with compute_utils.EventReporter(context, "get_image_info", instance.uuid):
                    try:
                        image = safe_image_show(context, image_id)
                    except exception.ImageNotFound:
                        instance.vm_state = vm_states.ERROR
                        instance.save()

                        reason = _("Unshelve attempted but the image %s " "cannot be found.") % image_id

                        LOG.error(reason, instance=instance)
                        raise exception.UnshelveException(instance_id=instance.uuid, reason=reason)

            try:
                with compute_utils.EventReporter(context, "schedule_instances", instance.uuid):
                    filter_properties = {}
                    scheduler_utils.populate_retry(filter_properties, instance.uuid)
                    request_spec = scheduler_utils.build_request_spec(context, image, [instance])
                    hosts = self._schedule_instances(context, request_spec, filter_properties)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(filter_properties, host_state)
                    (host, node) = (host_state["host"], host_state["nodename"])
                    self.compute_rpcapi.unshelve_instance(
                        context, instance, host, image=image, filter_properties=filter_properties, node=node
                    )
            except (exception.NoValidHost, exception.UnsupportedPolicyException):
                instance.task_state = None
                instance.save()
                LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance)
                return
            except Exception:
                with excutils.save_and_reraise_exception():
                    instance.task_state = None
                    instance.save()
                    LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance)
        else:
            LOG.error(_LE("Unshelve attempted but vm_state not SHELVED or " "SHELVED_OFFLOADED"), instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return
Beispiel #9
0
    def _cold_migrate(self, context, instance, flavor, filter_properties,
                      reservations):
        image_ref = instance.image_ref
        image = compute_utils.get_image_metadata(
            context, self.image_api, image_ref, instance)

        request_spec = scheduler_utils.build_request_spec(
            context, image, [instance], instance_type=flavor)

        quotas = objects.Quotas.from_reservations(context,
                                                  reservations,
                                                  instance=instance)
        scheduler_utils.setup_instance_group(context, request_spec,
                                             filter_properties)
        try:
            scheduler_utils.populate_retry(filter_properties, instance['uuid'])
            hosts = self.scheduler_client.select_destinations(
                    context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            vm_state = instance['vm_state']
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {'vm_state': vm_state, 'task_state': None}
            self._set_vm_state_and_notify(context, 'migrate_server',
                                          updates, ex, request_spec)
            quotas.rollback()

            # if the flavor IDs match, it's migrate; otherwise resize
            if flavor['id'] == instance['instance_type_id']:
                msg = _("No valid host found for cold migrate")
            else:
                msg = _("No valid host found for resize")
            raise exception.NoValidHost(reason=msg)

        try:
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)
            # context is not serializable
            filter_properties.pop('context', None)

            # TODO(timello): originally, instance_type in request_spec
            # on compute.api.resize does not have 'extra_specs', so we
            # remove it for now to keep tests backward compatibility.
            request_spec['instance_type'].pop('extra_specs', None)

            (host, node) = (host_state['host'], host_state['nodename'])
            self.compute_rpcapi.prep_resize(
                context, image, instance,
                flavor, host,
                reservations, request_spec=request_spec,
                filter_properties=filter_properties, node=node)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {'vm_state': instance['vm_state'],
                           'task_state': None}
                self._set_vm_state_and_notify(context, 'migrate_server',
                                              updates, ex, request_spec)
                quotas.rollback()
Beispiel #10
0
    def unshelve_instance(self, context, instance):
        sys_meta = instance.system_metadata

        def safe_image_show(ctx, image_id):
            if image_id:
                return self.image_api.get(ctx, image_id, show_deleted=False)
            else:
                raise exception.ImageNotFound(image_id="")

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
            snapshot_id = sys_meta.get("shelved_image_id")
            if snapshot_id:
                self._delete_image(context, snapshot_id)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            image_id = sys_meta.get("shelved_image_id")
            with compute_utils.EventReporter(context, "get_image_info", instance.uuid):
                try:
                    image = safe_image_show(context, image_id)
                except exception.ImageNotFound:
                    instance.vm_state = vm_states.ERROR
                    instance.save()

                    if image_id:
                        reason = _("Unshelve attempted but the image %s " "cannot be found.") % image_id
                    else:
                        reason = _("Unshelve attempted but the image_id is " "not provided")

                    LOG.error(reason, instance=instance)
                    raise exception.UnshelveException(instance_id=instance.uuid, reason=reason)

            try:
                with compute_utils.EventReporter(context, "schedule_instances", instance.uuid):
                    filter_properties = {}
                    hosts = self._schedule_instances(context, image, filter_properties, instance)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(filter_properties, host_state)
                    (host, node) = (host_state["host"], host_state["nodename"])
                    self.compute_rpcapi.unshelve_instance(
                        context, instance, host, image=image, filter_properties=filter_properties, node=node
                    )
            except exception.NoValidHost:
                instance.task_state = None
                instance.save()
                LOG.warning(_("No valid host found for unshelve instance"), instance=instance)
                return
        else:
            LOG.error(_LE("Unshelve attempted but vm_state not SHELVED or " "SHELVED_OFFLOADED"), instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return

        for key in ["shelved_at", "shelved_image_id", "shelved_host"]:
            if key in sys_meta:
                del (sys_meta[key])
        instance.system_metadata = sys_meta
        instance.save()
Beispiel #11
0
    def _cold_migrate(self, context, instance, flavor, filter_properties, reservations, clean_shutdown):
        image_ref = instance.image_ref
        image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance)

        request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor)

        quotas = objects.Quotas.from_reservations(context, reservations, instance=instance)
        try:
            scheduler_utils.setup_instance_group(context, request_spec, filter_properties)
            scheduler_utils.populate_retry(filter_properties, instance["uuid"])
            hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            vm_state = instance.vm_state
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {"vm_state": vm_state, "task_state": None}
            self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec)
            quotas.rollback()

            # if the flavor IDs match, it's migrate; otherwise resize
            if flavor["id"] == instance["instance_type_id"]:
                msg = _("No valid host found for cold migrate")
            else:
                msg = _("No valid host found for resize")
            raise exception.NoValidHost(reason=msg)
        except exception.UnsupportedPolicyException as ex:
            with excutils.save_and_reraise_exception():
                vm_state = instance.vm_state
                if not vm_state:
                    vm_state = vm_states.ACTIVE
                updates = {"vm_state": vm_state, "task_state": None}
                self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec)
                quotas.rollback()

        try:
            scheduler_utils.populate_filter_properties(filter_properties, host_state)
            # context is not serializable
            filter_properties.pop("context", None)

            (host, node) = (host_state["host"], host_state["nodename"])
            self.compute_rpcapi.prep_resize(
                context,
                image,
                instance,
                flavor,
                host,
                reservations,
                request_spec=request_spec,
                filter_properties=filter_properties,
                node=node,
                clean_shutdown=clean_shutdown,
            )
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {"vm_state": instance.vm_state, "task_state": None}
                self._set_vm_state_and_notify(context, instance.uuid, "migrate_server", updates, ex, request_spec)
                quotas.rollback()
Beispiel #12
0
    def _test_populate_filter_props(self, host_state_obj=True,
                                    with_retry=True,
                                    force_hosts=None,
                                    force_nodes=None):
        if force_hosts is None:
            force_hosts = []
        if force_nodes is None:
            force_nodes = []
        if with_retry:
            if ((len(force_hosts) == 1 and len(force_nodes) <= 1)
                 or (len(force_nodes) == 1 and len(force_hosts) <= 1)):
                filter_properties = dict(force_hosts=force_hosts,
                                         force_nodes=force_nodes)
            elif len(force_hosts) > 1 or len(force_nodes) > 1:
                filter_properties = dict(retry=dict(hosts=[]),
                                         force_hosts=force_hosts,
                                         force_nodes=force_nodes)
            else:
                filter_properties = dict(retry=dict(hosts=[]))
        else:
            filter_properties = dict()

        if host_state_obj:
            class host_state(object):
                host = 'fake-host'
                nodename = 'fake-node'
                limits = 'fake-limits'
        else:
            host_state = dict(host='fake-host',
                              nodename='fake-node',
                              limits='fake-limits')

        scheduler_utils.populate_filter_properties(filter_properties,
                                                   host_state)

        enable_retry_force_hosts = not force_hosts or len(force_hosts) > 1
        enable_retry_force_nodes = not force_nodes or len(force_nodes) > 1
        if with_retry or enable_retry_force_hosts or enable_retry_force_nodes:
            # So we can check for 2 hosts
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)

        if force_hosts:
            expected_limits = None
        else:
            expected_limits = 'fake-limits'
        self.assertEqual(expected_limits,
                         filter_properties.get('limits'))

        if (with_retry and enable_retry_force_hosts
                       and enable_retry_force_nodes):
            self.assertEqual([['fake-host', 'fake-node'],
                              ['fake-host', 'fake-node']],
                             filter_properties['retry']['hosts'])
        else:
            self.assertNotIn('retry', filter_properties)
Beispiel #13
0
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        request_spec = scheduler_utils.build_request_spec(context, image,
                                                          instances)
        scheduler_utils.setup_instance_group(context, request_spec,
                                             filter_properties)
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and
                not isinstance(requested_networks,
                               objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(
                objects=[objects.NetworkRequest.from_tuple(t)
                         for t in requested_networks])

        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            scheduler_utils.populate_retry(filter_properties,
                instances[0].uuid)
            hosts = self.scheduler_client.select_destinations(context,
                    request_spec, filter_properties)
        except Exception as exc:
            for instance in instances:
                scheduler_driver.handle_schedule_error(context, exc,
                        instance.uuid, request_spec)
            return

        for (instance, host) in itertools.izip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)

            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host['host'], image=image,
                    request_spec=request_spec,
                    filter_properties=local_filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host['nodename'],
                    limits=host['limits'])
Beispiel #14
0
    def unshelve_instance(self, context, instance):
        sys_meta = instance.system_metadata

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
            snapshot_id = sys_meta.get('shelved_image_id')
            if snapshot_id:
                self._delete_image(context, snapshot_id)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            try:
                with compute_utils.EventReporter(context, self.db,
                        'get_image_info', instance.uuid):
                    image = self._get_image(context,
                            sys_meta['shelved_image_id'])
            except exception.ImageNotFound:
                with excutils.save_and_reraise_exception():
                    LOG.error(_('Unshelve attempted but vm_state not SHELVED '
                                'or SHELVED_OFFLOADED'), instance=instance)
                    instance.vm_state = vm_states.ERROR
                    instance.save()

            try:
                with compute_utils.EventReporter(context, self.db,
                                                 'schedule_instances',
                                                 instance.uuid):
                    filter_properties = {}
                    hosts = self._schedule_instances(context, image,
                                                     filter_properties,
                                                     instance)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(
                            filter_properties, host_state)
                    (host, node) = (host_state['host'], host_state['nodename'])
                    self.compute_rpcapi.unshelve_instance(
                            context, instance, host, image=image,
                            filter_properties=filter_properties, node=node)
            except exception.NoValidHost as ex:
                instance.task_state = None
                instance.save()
                LOG.warning(_("No valid host found for unshelve instance"),
                            instance=instance)
                return
        else:
            LOG.error(_('Unshelve attempted but vm_state not SHELVED or '
                        'SHELVED_OFFLOADED'), instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return

        for key in ['shelved_at', 'shelved_image_id', 'shelved_host']:
            if key in sys_meta:
                del(sys_meta[key])
        instance.system_metadata = sys_meta
        instance.save()
Beispiel #15
0
    def test_post_select_populate(self):
        # Test addition of certain filter props after a node is selected.
        retry = {'hosts': [], 'num_attempts': 1}
        filter_properties = {'retry': retry}

        selection = objects.Selection(service_host="host", nodename="node",
                cell_uuid=uuids.cell)
        scheduler_utils.populate_filter_properties(filter_properties,
                selection)
        self.assertEqual(['host', 'node'],
                         filter_properties['retry']['hosts'][0])
Beispiel #16
0
    def _cold_migrate(self, context, instance, flavor, filter_properties,
                      reservations):
        image_ref = instance.image_ref
        if image_ref:
            image = self._get_image(context, image_ref)
        else:
            image = {}

        request_spec = scheduler_utils.build_request_spec(
            context, image, [instance], instance_type=flavor)

        try:
            hosts = self.scheduler_rpcapi.select_destinations(
                    context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            vm_state = instance['vm_state']
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {'vm_state': vm_state, 'task_state': None}
            self._set_vm_state_and_notify(context, 'migrate_server',
                                          updates, ex, request_spec)
            if reservations:
                self.quotas.rollback(context, reservations)

            LOG.warning(_("No valid host found for cold migrate"))
            return

        try:
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)
            # context is not serializable
            filter_properties.pop('context', None)

            # TODO(timello): originally, instance_type in request_spec
            # on compute.api.resize does not have 'extra_specs', so we
            # remove it for now to keep tests backward compatibility.
            request_spec['instance_type'].pop('extra_specs')

            (host, node) = (host_state['host'], host_state['nodename'])
            self.compute_rpcapi.prep_resize(
                context, image, instance,
                flavor, host,
                reservations, request_spec=request_spec,
                filter_properties=filter_properties, node=node)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {'vm_state': vm_states.ERROR,
                            'task_state': None}
                self._set_vm_state_and_notify(context, 'migrate_server',
                                              updates, ex, request_spec)
                if reservations:
                    self.quotas.rollback(context, reservations)
Beispiel #17
0
    def test_post_select_populate(self):
        # Test addition of certain filter props after a node is selected.
        retry = {"hosts": [], "num_attempts": 1}
        filter_properties = {"retry": retry}

        host_state = host_manager.HostState("host", "node")
        host_state.limits["vcpus"] = 5
        scheduler_utils.populate_filter_properties(filter_properties, host_state)

        self.assertEqual(["host", "node"], filter_properties["retry"]["hosts"][0])

        self.assertEqual({"vcpus": 5}, host_state.limits)
Beispiel #18
0
    def test_post_select_populate(self):
        # Test addition of certain filter props after a node is selected.
        retry = {'hosts': [], 'num_attempts': 1}
        filter_properties = {'retry': retry}

        host_state = host_manager.HostState('host', 'node')
        host_state.limits['vcpus'] = 5
        scheduler_utils.populate_filter_properties(filter_properties,
                host_state)

        self.assertEqual(['host', 'node'],
                         filter_properties['retry']['hosts'][0])

        self.assertEqual({'vcpus': 5}, host_state.limits)
Beispiel #19
0
    def _execute(self):
        image = self.request_spec.image
        self.quotas = objects.Quotas.from_reservations(self.context,
                                                       self.reservations,
                                                       instance=self.instance)
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, legacy_spec,
                                             legacy_props)
        scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # TODO(sbauza): Remove that RequestSpec rehydratation once
        # scheduler.utils methods use directly the NovaObject.
        self.request_spec = objects.RequestSpec.from_components(
            self.context, self.instance.uuid, image, self.flavor,
            self.instance.numa_topology, self.instance.pci_requests,
            legacy_props, None, self.instance.availability_zone)
        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()
        hosts = self.scheduler_client.select_destinations(
            self.context, self.request_spec)
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(legacy_props, host_state)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (host_state['host'], host_state['nodename'])

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        self.compute_rpcapi.prep_live_resize(
            self.context,
            self.instance,
            legacy_spec['image'],
            self.flavor,
            host,
            self.reservations,
            request_spec=legacy_spec,
            filter_properties=legacy_props,
            node=node,
            clean_shutdown=self.clean_shutdown)
Beispiel #20
0
    def _provision_resource(
        self,
        context,
        weighed_host,
        request_spec,
        filter_properties,
        requested_networks,
        injected_files,
        admin_password,
        is_first_time,
        instance_uuid=None,
        legacy_bdm_in_spec=True,
    ):
        """Create the requested resource in this Zone."""
        # NOTE(vish): add our current instance back into the request spec
        request_spec["instance_uuids"] = [instance_uuid]
        payload = dict(request_spec=request_spec, weighted_host=weighed_host.to_dict(), instance_id=instance_uuid)
        self.notifier.info(context, "scheduler.run_instance.scheduled", payload)

        # Update the metadata if necessary
        scheduler_hints = filter_properties.get("scheduler_hints") or {}
        group = scheduler_hints.get("group", None)
        values = None
        if group:
            values = request_spec["instance_properties"]["system_metadata"]
            values.update({"group": group})
            values = {"system_metadata": values}

        try:
            updated_instance = driver.instance_update_db(context, instance_uuid, extra_values=values)

        except exception.InstanceNotFound:
            LOG.warning(_("Instance disappeared during scheduling"), context=context, instance_uuid=instance_uuid)

        else:
            scheduler_utils.populate_filter_properties(filter_properties, weighed_host.obj)

            self.compute_rpcapi.run_instance(
                context,
                instance=updated_instance,
                host=weighed_host.obj.host,
                request_spec=request_spec,
                filter_properties=filter_properties,
                requested_networks=requested_networks,
                injected_files=injected_files,
                admin_password=admin_password,
                is_first_time=is_first_time,
                node=weighed_host.obj.nodename,
                legacy_bdm_in_spec=legacy_bdm_in_spec,
            )
Beispiel #21
0
    def test_post_select_populate(self):
        # Test addition of certain filter props after a node is selected.
        retry = {'hosts': [], 'num_attempts': 1}
        filter_properties = {'retry': retry}

        host_state = host_manager.HostState('host', 'node')
        host_state.limits['vcpus'] = 5
        scheduler_utils.populate_filter_properties(filter_properties,
                host_state)

        self.assertEqual(['host', 'node'],
                         filter_properties['retry']['hosts'][0])

        self.assertEqual({'vcpus': 5}, host_state.limits)
Beispiel #22
0
    def _cold_migrate(self, context, instance, flavor, filter_properties, reservations):
        image_ref = instance.image_ref
        image = compute_utils.get_image_metadata(context, self.image_api, image_ref, instance)

        request_spec = scheduler_utils.build_request_spec(context, image, [instance], instance_type=flavor)

        quotas = quotas_obj.Quotas.from_reservations(context, reservations, instance=instance)
        try:
            scheduler_utils.populate_retry(filter_properties, instance["uuid"])
            hosts = self.scheduler_rpcapi.select_destinations(context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            vm_state = instance["vm_state"]
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {"vm_state": vm_state, "task_state": None}
            self._set_vm_state_and_notify(context, "migrate_server", updates, ex, request_spec)
            quotas.rollback()

            msg = _("No valid host found for cold migrate")
            raise exception.NoValidHost(reason=msg)

        try:
            scheduler_utils.populate_filter_properties(filter_properties, host_state)
            # context is not serializable
            filter_properties.pop("context", None)

            # TODO(timello): originally, instance_type in request_spec
            # on compute.api.resize does not have 'extra_specs', so we
            # remove it for now to keep tests backward compatibility.
            request_spec["instance_type"].pop("extra_specs")

            (host, node) = (host_state["host"], host_state["nodename"])
            self.compute_rpcapi.prep_resize(
                context,
                image,
                instance,
                flavor,
                host,
                reservations,
                request_spec=request_spec,
                filter_properties=filter_properties,
                node=node,
            )
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {"vm_state": instance["vm_state"], "task_state": None}
                self._set_vm_state_and_notify(context, "migrate_server", updates, ex, request_spec)
                quotas.rollback()
Beispiel #23
0
    def prep_resize(self, context, image, request_spec, filter_properties,
                    instance, instance_type, reservations):
        """Tries to call schedule_prep_resize on the driver.
        Sets instance vm_state to ACTIVE on NoHostFound
        Sets vm_state to ERROR on other exceptions
        """
        instance_uuid = instance['uuid']
        with compute_utils.EventReporter(context, conductor_api.LocalAPI(),
                                         'schedule', instance_uuid):
            try:
                request_spec['num_instances'] = len(
                    request_spec['instance_uuids'])
                hosts = self.driver.select_destinations(
                    context, request_spec, filter_properties)
                host_state = hosts[0]

                scheduler_utils.populate_filter_properties(
                    filter_properties, host_state)
                # context is not serializable
                filter_properties.pop('context', None)

                (host, node) = (host_state['host'], host_state['nodename'])
                self.compute_rpcapi.prep_resize(
                    context,
                    image,
                    instance,
                    instance_type,
                    host,
                    reservations,
                    request_spec=request_spec,
                    filter_properties=filter_properties,
                    node=node)

            except exception.NoValidHost as ex:
                vm_state = instance.get('vm_state', vm_states.ACTIVE)
                self._set_vm_state_and_notify('prep_resize', {
                    'vm_state': vm_state,
                    'task_state': None
                }, context, ex, request_spec)
                if reservations:
                    QUOTAS.rollback(context, reservations)
            except Exception as ex:
                with excutils.save_and_reraise_exception():
                    self._set_vm_state_and_notify('prep_resize', {
                        'vm_state': vm_states.ERROR,
                        'task_state': None
                    }, context, ex, request_spec)
                    if reservations:
                        QUOTAS.rollback(context, reservations)
Beispiel #24
0
    def _cold_migrate(self, context, instance, flavor, filter_properties,
                      reservations):
        image_ref = instance.get('image_ref')
        if image_ref:
            image = self._get_image(context, image_ref)
        else:
            image = {}

        request_spec = scheduler_utils.build_request_spec(
            context, image, [instance])

        try:
            hosts = self.scheduler_rpcapi.select_destinations(
                    context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            updates = {'vm_state': vm_states.ACTIVE, 'task_state': None}
            self._set_vm_state_and_notify(context, 'migrate_server',
                                          updates, ex, request_spec)
            if reservations:
                self.quotas.rollback(context, reservations)

            LOG.warning(_("No valid host found for cold migrate"))
            return

        try:
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)
            # context is not serializable
            filter_properties.pop('context', None)

            # TODO(timello): originally, instance_type in request_spec
            # on compute.api.resize does not have 'extra_specs', so we
            # remove it for now to keep tests backward compatibility.
            request_spec['instance_type'].pop('extra_specs')

            (host, node) = (host_state['host'], host_state['nodename'])
            self.compute_rpcapi.prep_resize(
                context, image, instance, flavor, host,
                reservations, request_spec=request_spec,
                filter_properties=filter_properties, node=node)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {'vm_state': vm_states.ERROR,
                            'task_state': None}
                self._set_vm_state_and_notify(context, 'migrate_server',
                                              updates, ex, request_spec)
                if reservations:
                    self.quotas.rollback(context, reservations)
Beispiel #25
0
    def _test_populate_filter_props(self,
                                    host_state_obj=True,
                                    with_retry=True,
                                    force_hosts=None,
                                    force_nodes=None):
        if force_hosts is None:
            force_hosts = []
        if force_nodes is None:
            force_nodes = []
        if with_retry:
            if not force_hosts and not force_nodes:
                filter_properties = dict(retry=dict(hosts=[]))
            else:
                filter_properties = dict(force_hosts=force_hosts,
                                         force_nodes=force_nodes)
        else:
            filter_properties = dict()

        if host_state_obj:

            class host_state(object):
                host = 'fake-host'
                nodename = 'fake-node'
                limits = 'fake-limits'
        else:
            host_state = dict(host='fake-host',
                              nodename='fake-node',
                              limits='fake-limits')

        scheduler_utils.populate_filter_properties(filter_properties,
                                                   host_state)
        if with_retry and not force_hosts and not force_nodes:
            # So we can check for 2 hosts
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)

        if force_hosts:
            expected_limits = None
        else:
            expected_limits = 'fake-limits'
        self.assertEqual(expected_limits, filter_properties.get('limits'))

        if with_retry and not force_hosts and not force_nodes:
            self.assertEqual(
                [['fake-host', 'fake-node'], ['fake-host', 'fake-node']],
                filter_properties['retry']['hosts'])
        else:
            self.assertNotIn('retry', filter_properties)
Beispiel #26
0
    def prep_resize(self, context, image, request_spec, filter_properties,
                    instance, instance_type, reservations):
        """Tries to call schedule_prep_resize on the driver.
        Sets instance vm_state to ACTIVE on NoHostFound
        Sets vm_state to ERROR on other exceptions
        """
        instance_uuid = instance['uuid']
        with compute_utils.EventReporter(context, conductor_api.LocalAPI(),
                                         'schedule', instance_uuid):
            try:
                request_spec['num_instances'] = len(
                        request_spec['instance_uuids'])
                hosts = self.driver.select_destinations(
                        context, request_spec, filter_properties)
                host_state = hosts[0]

                scheduler_utils.populate_filter_properties(filter_properties,
                                                           host_state)
                # context is not serializable
                filter_properties.pop('context', None)

                (host, node) = (host_state['host'], host_state['nodename'])
                attrs = ['metadata', 'system_metadata', 'info_cache',
                         'security_groups']
                inst_obj = instance_obj.Instance._from_db_object(
                        context, instance_obj.Instance(), instance,
                        expected_attrs=attrs)
                self.compute_rpcapi.prep_resize(
                    context, image, inst_obj, instance_type, host,
                    reservations, request_spec=request_spec,
                    filter_properties=filter_properties, node=node)

            except exception.NoValidHost as ex:
                vm_state = instance.get('vm_state', vm_states.ACTIVE)
                self._set_vm_state_and_notify('prep_resize',
                                             {'vm_state': vm_state,
                                              'task_state': None},
                                             context, ex, request_spec)
                if reservations:
                    QUOTAS.rollback(context, reservations)
            except Exception as ex:
                with excutils.save_and_reraise_exception():
                    self._set_vm_state_and_notify('prep_resize',
                                                 {'vm_state': vm_states.ERROR,
                                                  'task_state': None},
                                                 context, ex, request_spec)
                    if reservations:
                        QUOTAS.rollback(context, reservations)
Beispiel #27
0
    def _execute(self):
        image = self.request_spec.image
        self.quotas = objects.Quotas.from_reservations(self.context,
                                                       self.reservations,
                                                       instance=self.instance)
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, legacy_spec,
                                             legacy_props)
        scheduler_utils.populate_retry(legacy_props,
                                       self.instance.uuid)

        # TODO(sbauza): Remove that RequestSpec rehydratation once
        # scheduler.utils methods use directly the NovaObject.
        self.request_spec = objects.RequestSpec.from_components(
            self.context, self.instance.uuid, image,
            self.flavor, self.instance.numa_topology,
            self.instance.pci_requests, legacy_props, None,
            self.instance.availability_zone)
        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()
        hosts = self.scheduler_client.select_destinations(
            self.context, self.request_spec)
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(legacy_props,
                                                   host_state)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (host_state['host'], host_state['nodename'])

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        self.compute_rpcapi.prep_resize(
            self.context, self.instance, legacy_spec['image'],
            self.flavor, host, self.reservations,
            request_spec=legacy_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown)
Beispiel #28
0
    def _execute(self):
        image = self.request_spec.get('image')
        self.quotas = objects.Quotas.from_reservations(self.context,
                                                       self.reservations,
                                                       instance=self.instance)
        scheduler_utils.setup_instance_group(self.context, self.request_spec,
                                             self.filter_properties)
        scheduler_utils.populate_retry(self.filter_properties,
                                       self.instance.uuid)
        # TODO(sbauza): Hydrate here the object until we modify the
        # scheduler.utils methods to directly use the RequestSpec object
        spec_obj = objects.RequestSpec.from_primitives(
            self.context, self.request_spec, self.filter_properties)
        hosts = self.scheduler_client.select_destinations(
            self.context, spec_obj)
        LOG.info('TTTType hosts:%s' % hosts)
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(self.filter_properties,
                                                   host_state)
        # context is not serializable
        self.filter_properties.pop('context', None)

        #(host, node) = (host_state['host'], host_state['nodename'])

        LOG.info('TTTType host_state:%s' % host_state)

        # add by jiahua
        #(host, node) = (self.filter_properties['destination_host_name'],
        #                self.filter_properties['destination_host_name'])
        # specify host use specify host, else use localhost
        #node = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \
        #    else host_state['nodename']
        #host = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \
        #    else host_state['host']
        node = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \
            else self.filter_properties['instance_host_name']
        host = self.filter_properties['destination_host_name'] if self.filter_properties['destination_host_name'] \
            else self.filter_properties['instance_host_name']
        LOG.info('TTTType node:%s' % node)
        LOG.info('TTTType host:%s' % host)
        self.compute_rpcapi.prep_resize(
            self.context, image, self.instance, self.flavor, host,
            self.reservations, request_spec=self.request_spec,
            filter_properties=self.filter_properties, node=node,
            clean_shutdown=self.clean_shutdown)
    def _provision_resource(self, context, selected_host, request_spec,
            filter_properties, requested_networks, injected_files,
            admin_password, is_first_time, instance_uuid=None):
        """Create the requested resource in this Zone."""
        # NOTE(vish): add our current instance back into the request spec
        request_spec['instance_uuids'] = [instance_uuid]
        #NOTE(Yathi): Not using weights in solver scheduler,
        #but leaving in the payload
        payload = dict(request_spec=request_spec,
                       weighted_host=dict(host=selected_host.host, weight=1),
                       instance_id=instance_uuid)
        notifier.notify(context, notifier.publisher_id("scheduler"),
                        'scheduler.run_instance.scheduled', notifier.INFO,
                        payload)

        # Update the metadata if necessary
        scheduler_hints = filter_properties.get('scheduler_hints') or {}
        group = scheduler_hints.get('group', None)
        values = None
        if group:
            values = request_spec['instance_properties']['system_metadata']
            values.update({'group': group})
            values = {'system_metadata': values}

        try:
            updated_instance = driver.instance_update_db(context,
                    instance_uuid, extra_values=values)

        except exception.InstanceNotFound:
            LOG.warning(_("Instance disappeared during scheduling"),
                        context=context, instance_uuid=instance_uuid)

        else:
            scheduler_utils.populate_filter_properties(filter_properties,
                    selected_host)

            self.compute_rpcapi.run_instance(context,
                    instance=updated_instance,
                    host=selected_host.host,
                    request_spec=request_spec,
                    filter_properties=filter_properties,
                    requested_networks=requested_networks,
                    injected_files=injected_files,
                    admin_password=admin_password,
                    is_first_time=is_first_time,
                    node=selected_host.nodename)
Beispiel #30
0
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        request_spec = scheduler_utils.build_request_spec(context, image,
                                                          instances)
        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            scheduler_utils.populate_retry(filter_properties,
                instances[0].uuid)
            hosts = self.scheduler_rpcapi.select_destinations(context,
                    request_spec, filter_properties)
        except Exception as exc:
            for instance in instances:
                scheduler_driver.handle_schedule_error(context, exc,
                        instance.uuid, request_spec)
            return

        for (instance, host) in itertools.izip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)

            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host['host'], image=image,
                    request_spec=request_spec,
                    filter_properties=local_filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host['nodename'],
                    limits=host['limits'])
    def _provision_resource(self,
                            context,
                            weighed_host,
                            request_spec,
                            filter_properties,
                            requested_networks,
                            injected_files,
                            admin_password,
                            is_first_time,
                            instance_uuid=None,
                            legacy_bdm_in_spec=True):
        """Create the requested resource in this Zone."""
        # NOTE(vish): add our current instance back into the request spec
        request_spec['instance_uuids'] = [instance_uuid]
        payload = dict(request_spec=request_spec,
                       weighted_host=weighed_host.to_dict(),
                       instance_id=instance_uuid)
        self.notifier.info(context, 'scheduler.run_instance.scheduled',
                           payload)

        # Update the metadata if necessary
        scheduler_hints = filter_properties.get('scheduler_hints') or {}
        try:
            updated_instance = driver.instance_update_db(
                context, instance_uuid)
        except exception.InstanceNotFound:
            LOG.warning(_("Instance disappeared during scheduling"),
                        context=context,
                        instance_uuid=instance_uuid)

        else:
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       weighed_host.obj)

            self.compute_rpcapi.run_instance(
                context,
                instance=updated_instance,
                host=weighed_host.obj.host,
                request_spec=request_spec,
                filter_properties=filter_properties,
                requested_networks=requested_networks,
                injected_files=injected_files,
                admin_password=admin_password,
                is_first_time=is_first_time,
                node=weighed_host.obj.nodename,
                legacy_bdm_in_spec=legacy_bdm_in_spec)
    def _provision_resource(self,
                            context,
                            weighed_host,
                            request_spec,
                            filter_properties,
                            requested_networks,
                            injected_files,
                            admin_password,
                            is_first_time,
                            instance_uuid=None):
        """Create the requested resource in this Zone."""
        # NOTE(vish): add our current instance back into the request spec
        request_spec['instance_uuids'] = [instance_uuid]
        payload = dict(request_spec=request_spec,
                       weighted_host=weighed_host.to_dict(),
                       instance_id=instance_uuid)
        notifier.notify(context, notifier.publisher_id("scheduler"),
                        'scheduler.run_instance.scheduled', notifier.INFO,
                        payload)

        # Update the metadata if necessary
        scheduler_hints = filter_properties.get('scheduler_hints') or {}
        group = scheduler_hints.get('group', None)
        values = None
        if group:
            values = request_spec['instance_properties']['system_metadata']
            values.update({'group': group})
            values = {'system_metadata': values}

        updated_instance = driver.instance_update_db(context,
                                                     instance_uuid,
                                                     extra_values=values)

        scheduler_utils.populate_filter_properties(filter_properties,
                                                   weighed_host.obj)

        self.compute_rpcapi.run_instance(context,
                                         instance=updated_instance,
                                         host=weighed_host.obj.host,
                                         request_spec=request_spec,
                                         filter_properties=filter_properties,
                                         requested_networks=requested_networks,
                                         injected_files=injected_files,
                                         admin_password=admin_password,
                                         is_first_time=is_first_time,
                                         node=weighed_host.obj.nodename)
Beispiel #33
0
    def _test_populate_filter_props(self, selection_obj=True,
                                    with_retry=True,
                                    force_hosts=None,
                                    force_nodes=None,
                                    no_limits=None):
        if force_hosts is None:
            force_hosts = []
        if force_nodes is None:
            force_nodes = []
        if with_retry:
            if ((len(force_hosts) == 1 and len(force_nodes) <= 1)
                 or (len(force_nodes) == 1 and len(force_hosts) <= 1)):
                filter_properties = dict(force_hosts=force_hosts,
                                         force_nodes=force_nodes)
            elif len(force_hosts) > 1 or len(force_nodes) > 1:
                filter_properties = dict(retry=dict(hosts=[]),
                                         force_hosts=force_hosts,
                                         force_nodes=force_nodes)
            else:
                filter_properties = dict(retry=dict(hosts=[]))
        else:
            filter_properties = dict()

        if no_limits:
            fake_limits = None
        else:
            fake_limits = objects.SchedulerLimits(vcpu=1, disk_gb=2,
                    memory_mb=3, numa_topology=None)
        selection = objects.Selection(service_host="fake-host",
                nodename="fake-node", limits=fake_limits)
        if not selection_obj:
            selection = selection.to_dict()
            fake_limits = fake_limits.to_dict()

        scheduler_utils.populate_filter_properties(filter_properties,
                                                   selection)

        enable_retry_force_hosts = not force_hosts or len(force_hosts) > 1
        enable_retry_force_nodes = not force_nodes or len(force_nodes) > 1
        if with_retry or enable_retry_force_hosts or enable_retry_force_nodes:
            # So we can check for 2 hosts
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       selection)

        if force_hosts:
            expected_limits = None
        elif no_limits:
            expected_limits = {}
        elif isinstance(fake_limits, objects.SchedulerLimits):
            expected_limits = fake_limits.to_dict()
        else:
            expected_limits = fake_limits
        self.assertEqual(expected_limits,
                         filter_properties.get('limits'))

        if (with_retry and enable_retry_force_hosts
                       and enable_retry_force_nodes):
            self.assertEqual([['fake-host', 'fake-node'],
                              ['fake-host', 'fake-node']],
                             filter_properties['retry']['hosts'])
        else:
            self.assertNotIn('retry', filter_properties)
Beispiel #34
0
    def _execute(self):
        # TODO(sbauza): Remove once all the scheduler.utils methods accept a
        # RequestSpec object in the signature.
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec and
                    self.request_spec.requested_destination and
                        'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props,
                                           self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # TODO(gibi): We need to make sure that the requested_resources field
        # is re calculated based on neutron ports.

        self._restrict_request_spec_to_cell(legacy_props)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        self.request_spec.ensure_network_metadata(self.instance)
        compute_utils.heal_reqspec_is_bfv(
            self.context, self.request_spec, self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.query_client.select_destinations(
                    self.context, self.request_spec, [self.instance.uuid],
                    return_objects=True, return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                            elevated, self.reportclient, self.request_spec,
                            self.instance.uuid, alloc_req,
                            selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." %
                          {"instance_uuid": self.instance.uuid})
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(
                self.context, host))

        LOG.debug("Calling prep_resize with selected host: %s; "
                  "Selected node: %s; Alternates: %s", host, node,
                  self.host_list, instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            # NOTE(mriedem): Using request_spec.image here is potentially
            # dangerous if it is not kept up to date (i.e. rebuild/unshelve);
            # seems like the sane thing to do would be to pass the current
            # instance.image_meta since that is what MoveClaim will use for
            # any NUMA topology claims on the destination host...
            self.context, self.instance, self.request_spec.image,
            self.flavor, host, migration,
            request_spec=self.request_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)
Beispiel #35
0
    def _cold_migrate(self, context, instance, flavor, filter_properties,
                      reservations):
        image_ref = instance.image_ref
        image = compute_utils.get_image_metadata(context, self.image_api,
                                                 image_ref, instance)

        request_spec = scheduler_utils.build_request_spec(context,
                                                          image, [instance],
                                                          instance_type=flavor)

        quotas = objects.Quotas.from_reservations(context,
                                                  reservations,
                                                  instance=instance)
        try:
            scheduler_utils.populate_retry(filter_properties, instance['uuid'])
            hosts = self.scheduler_client.select_destinations(
                context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            vm_state = instance['vm_state']
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {'vm_state': vm_state, 'task_state': None}
            self._set_vm_state_and_notify(context, 'migrate_server', updates,
                                          ex, request_spec)
            quotas.rollback()

            # if the flavor IDs match, it's migrate; otherwise resize
            if flavor['id'] == instance['instance_type_id']:
                msg = _("No valid host found for cold migrate")
            else:
                msg = _("No valid host found for resize")
            raise exception.NoValidHost(reason=msg)

        try:
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)
            # context is not serializable
            filter_properties.pop('context', None)

            # TODO(timello): originally, instance_type in request_spec
            # on compute.api.resize does not have 'extra_specs', so we
            # remove it for now to keep tests backward compatibility.
            request_spec['instance_type'].pop('extra_specs', None)

            (host, node) = (host_state['host'], host_state['nodename'])
            self.compute_rpcapi.prep_resize(
                context,
                image,
                instance,
                flavor,
                host,
                reservations,
                request_spec=request_spec,
                filter_properties=filter_properties,
                node=node)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {
                    'vm_state': instance['vm_state'],
                    'task_state': None
                }
                self._set_vm_state_and_notify(context, 'migrate_server',
                                              updates, ex, request_spec)
                quotas.rollback()
Beispiel #36
0
    def unshelve_instance(self, context, instance):
        sys_meta = instance.system_metadata

        def safe_image_show(ctx, image_id):
            if image_id:
                return self.image_api.get(ctx, image_id, show_deleted=False)
            else:
                raise exception.ImageNotFound(image_id='')

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            image = None
            image_id = sys_meta.get('shelved_image_id')
            # No need to check for image if image_id is None as
            # "shelved_image_id" key is not set for volume backed
            # instance during the shelve process
            if image_id:
                with compute_utils.EventReporter(context, 'get_image_info',
                                                 instance.uuid):
                    try:
                        image = safe_image_show(context, image_id)
                    except exception.ImageNotFound:
                        instance.vm_state = vm_states.ERROR
                        instance.save()

                        reason = _('Unshelve attempted but the image %s '
                                   'cannot be found.') % image_id

                        LOG.error(reason, instance=instance)
                        raise exception.UnshelveException(
                            instance_id=instance.uuid, reason=reason)

            try:
                with compute_utils.EventReporter(context, 'schedule_instances',
                                                 instance.uuid):
                    filter_properties = {}
                    scheduler_utils.populate_retry(filter_properties,
                                                   instance.uuid)
                    hosts = self._schedule_instances(context, image,
                                                     filter_properties,
                                                     instance)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(
                        filter_properties, host_state)
                    (host, node) = (host_state['host'], host_state['nodename'])
                    self.compute_rpcapi.unshelve_instance(
                        context,
                        instance,
                        host,
                        image=image,
                        filter_properties=filter_properties,
                        node=node)
            except (exception.NoValidHost,
                    exception.UnsupportedPolicyException):
                instance.task_state = None
                instance.save()
                LOG.warning(_LW("No valid host found for unshelve instance"),
                            instance=instance)
                return
            except Exception:
                with excutils.save_and_reraise_exception():
                    instance.task_state = None
                    instance.save()
                    LOG.error(_LE("Unshelve attempted but an error "
                                  "has occurred"),
                              instance=instance)
        else:
            LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or '
                          'SHELVED_OFFLOADED'),
                      instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return
Beispiel #37
0
    def _cold_migrate(self, context, instance, flavor, filter_properties,
                      reservations, clean_shutdown):
        image_ref = instance.image_ref
        image = compute_utils.get_image_metadata(context, self.image_api,
                                                 image_ref, instance)

        request_spec = scheduler_utils.build_request_spec(context,
                                                          image, [instance],
                                                          instance_type=flavor)

        quotas = objects.Quotas.from_reservations(context,
                                                  reservations,
                                                  instance=instance)
        try:
            scheduler_utils.setup_instance_group(context, request_spec,
                                                 filter_properties)
            scheduler_utils.populate_retry(filter_properties, instance['uuid'])
            hosts = self.scheduler_client.select_destinations(
                context, request_spec, filter_properties)
            host_state = hosts[0]
        except exception.NoValidHost as ex:
            vm_state = instance.vm_state
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {'vm_state': vm_state, 'task_state': None}
            self._set_vm_state_and_notify(context, instance.uuid,
                                          'migrate_server', updates, ex,
                                          request_spec)
            quotas.rollback()

            # if the flavor IDs match, it's migrate; otherwise resize
            if flavor['id'] == instance['instance_type_id']:
                msg = _("No valid host found for cold migrate")
            else:
                msg = _("No valid host found for resize")
            raise exception.NoValidHost(reason=msg)
        except exception.UnsupportedPolicyException as ex:
            with excutils.save_and_reraise_exception():
                vm_state = instance.vm_state
                if not vm_state:
                    vm_state = vm_states.ACTIVE
                updates = {'vm_state': vm_state, 'task_state': None}
                self._set_vm_state_and_notify(context, instance.uuid,
                                              'migrate_server', updates, ex,
                                              request_spec)
                quotas.rollback()

        try:
            scheduler_utils.populate_filter_properties(filter_properties,
                                                       host_state)
            # context is not serializable
            filter_properties.pop('context', None)

            (host, node) = (host_state['host'], host_state['nodename'])
            self.compute_rpcapi.prep_resize(
                context,
                image,
                instance,
                flavor,
                host,
                reservations,
                request_spec=request_spec,
                filter_properties=filter_properties,
                node=node,
                clean_shutdown=clean_shutdown)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                updates = {'vm_state': instance.vm_state, 'task_state': None}
                self._set_vm_state_and_notify(context, instance.uuid,
                                              'migrate_server', updates, ex,
                                              request_spec)
                quotas.rollback()
Beispiel #38
0
    def build_instances(self,
                        context,
                        instances,
                        image,
                        filter_properties,
                        admin_password,
                        injected_files,
                        requested_networks,
                        security_groups,
                        block_device_mapping=None,
                        legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and not isinstance(requested_networks,
                                                  objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(objects=[
                objects.NetworkRequest.from_tuple(t)
                for t in requested_networks
            ])
        # TODO(melwitt): Remove this in version 2.0 of the RPC API
        flavor = filter_properties.get('instance_type')
        if flavor and not isinstance(flavor, objects.Flavor):
            # Code downstream may expect extra_specs to be populated since it
            # is receiving an object, so lookup the flavor to ensure this.
            flavor = objects.Flavor.get_by_id(context, flavor['id'])
            filter_properties = dict(filter_properties, instance_type=flavor)

        request_spec = {}
        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            scheduler_utils.populate_retry(filter_properties,
                                           instances[0].uuid)
            request_spec = scheduler_utils.build_request_spec(
                context, image, instances)
            hosts = self._schedule_instances(context, request_spec,
                                             filter_properties)
        except Exception as exc:
            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
            for instance in instances:
                self._set_vm_state_and_notify(context, instance.uuid,
                                              'build_instances', updates, exc,
                                              request_spec)
                self._cleanup_allocated_networks(context, instance,
                                                 requested_networks)
            return

        host_hypervisor = ''
        hosts_info = []
        reselect_flag = self.need_select_image(request_spec)
        if reselect_flag:
            # Normal user need promoted privilege to search db
            elevated = context.elevated()
            hosts_info = db.compute_node_get_all(elevated)
            LOG.debug("hosts_info: {0}".format(hosts_info))

        for (instance, host) in six.moves.zip(instances, hosts):
            if reselect_flag:
                for hi in hosts_info:
                    if hi.get('service') and hi['service'].get(
                            'host') == host['host']:
                        host_hypervisor = hi.get('hypervisor_type')
                        LOG.debug(
                            'host_hypervisor: {0}'.format(host_hypervisor))
                        break

                image, instance, request_spec = self.select_image(
                    context, image, host_hypervisor, instance, request_spec,
                    filter_properties)
                LOG.debug("Final image: {0}".format(image.get('id')))

            try:
                instance.save()
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                                                       host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                context, instance.uuid)

            self.compute_rpcapi.build_and_run_instance(
                context,
                instance=instance,
                host=host['host'],
                image=image,
                request_spec=request_spec,
                filter_properties=local_filter_props,
                admin_password=admin_password,
                injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=bdms,
                node=host['nodename'],
                limits=host['limits'])
Beispiel #39
0
    def schedule_and_build_instances(self, context, build_requests,
                                     request_specs, image, admin_password,
                                     injected_files, requested_networks,
                                     block_device_mapping):
        legacy_spec = request_specs[0].to_legacy_request_spec_dict()
        try:
            hosts = self._schedule_instances(
                context, legacy_spec,
                request_specs[0].to_legacy_filter_properties_dict())
        except Exception as exc:
            LOG.exception(_LE('Failed to schedule instances'))
            self._bury_in_cell0(context,
                                request_specs[0],
                                exc,
                                build_requests=build_requests)
            return

        host_mapping_cache = {}

        for (build_request, request_spec,
             host) in six.moves.zip(build_requests, request_specs, hosts):
            filter_props = request_spec.to_legacy_filter_properties_dict()
            scheduler_utils.populate_filter_properties(filter_props, host)
            instance = build_request.get_new_instance(context)

            # Convert host from the scheduler into a cell record
            if host['host'] not in host_mapping_cache:
                try:
                    host_mapping = objects.HostMapping.get_by_host(
                        context, host['host'])
                    host_mapping_cache[host['host']] = host_mapping
                except exception.HostMappingNotFound as exc:
                    LOG.error(
                        _LE('No host-to-cell mapping found for selected '
                            'host %(host)s. Setup is incomplete.'),
                        {'host': host['host']})
                    self._bury_in_cell0(context,
                                        request_spec,
                                        exc,
                                        build_requests=[build_request],
                                        instances=[instance])
                    continue
            else:
                host_mapping = host_mapping_cache[host['host']]

            cell = host_mapping.cell_mapping

            with obj_target_cell(instance, cell):
                instance.create()

            # send a state update notification for the initial create to
            # show it going from non-existent to BUILDING
            notifications.send_update_with_states(context,
                                                  instance,
                                                  None,
                                                  vm_states.BUILDING,
                                                  None,
                                                  None,
                                                  service="conductor")

            objects.InstanceAction.action_start(context,
                                                instance.uuid,
                                                instance_actions.CREATE,
                                                want_result=False)

            with obj_target_cell(instance, cell):
                instance_bdms = self._create_block_device_mapping(
                    instance.flavor, instance.uuid, block_device_mapping)

            # Update mapping for instance. Normally this check is guarded by
            # a try/except but if we're here we know that a newer nova-api
            # handled the build process and would have created the mapping
            inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
                context, instance.uuid)
            inst_mapping.cell_mapping = cell
            inst_mapping.save()

            try:
                build_request.destroy()
            except exception.BuildRequestNotFound:
                # This indicates an instance deletion request has been
                # processed, and the build should halt here. Clean up the
                # bdm and instance record.
                with obj_target_cell(instance, cell):
                    try:
                        instance.destroy()
                    except exception.InstanceNotFound:
                        pass
                    except exception.ObjectActionError:
                        # NOTE(melwitt): Instance became scheduled during
                        # the destroy, "host changed". Refresh and re-destroy.
                        try:
                            instance.refresh()
                            instance.destroy()
                        except exception.InstanceNotFound:
                            pass
                for bdm in instance_bdms:
                    with obj_target_cell(bdm, cell):
                        try:
                            bdm.destroy()
                        except exception.ObjectActionError:
                            pass
                return

            # NOTE(danms): Compute RPC expects security group names or ids
            # not objects, so convert this to a list of names until we can
            # pass the objects.
            legacy_secgroups = [
                s.identifier for s in request_spec.security_groups
            ]

            with obj_target_cell(instance, cell):
                self.compute_rpcapi.build_and_run_instance(
                    context,
                    instance=instance,
                    image=image,
                    request_spec=request_spec,
                    filter_properties=filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=legacy_secgroups,
                    block_device_mapping=instance_bdms,
                    host=host['host'],
                    node=host['nodename'],
                    limits=host['limits'])
Beispiel #40
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        migration = self._preallocate_migration()

        hosts = self.scheduler_client.select_destinations(
            self.context, self.request_spec, [self.instance.uuid])
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(legacy_props, host_state)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (host_state['host'], host_state['nodename'])

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(self.context,
                                        self.instance,
                                        legacy_spec['image'],
                                        self.flavor,
                                        host,
                                        migration,
                                        self.reservations,
                                        request_spec=legacy_spec,
                                        filter_properties=legacy_props,
                                        node=node,
                                        clean_shutdown=self.clean_shutdown)
Beispiel #41
0
    def build_instances(self,
                        context,
                        instances,
                        image,
                        filter_properties,
                        admin_password,
                        injected_files,
                        requested_networks,
                        security_groups,
                        block_device_mapping=None,
                        legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        request_spec = scheduler_utils.build_request_spec(
            context, image, instances)
        # NOTE(sbauza): filter_properties['hints'] can be None
        hints = filter_properties.get('scheduler_hints', {}) or {}
        group_hint = hints.get('group')
        group_hosts = filter_properties.get('group_hosts')
        group_info = scheduler_utils.setup_instance_group(
            context, group_hint, group_hosts)
        if isinstance(group_info, tuple):
            filter_properties['group_updated'] = True
            (filter_properties['group_hosts'],
             filter_properties['group_policies']) = group_info
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and not isinstance(requested_networks,
                                                  objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(objects=[
                objects.NetworkRequest.from_tuple(t)
                for t in requested_networks
            ])

        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            # (luzhq) 验证重试策略
            # 更新filter_properties中的重试属性,若当前为重试部署则同时检测当前
            # 的重试次数是否超过最大重试次数,需要注意的是:这里使用instances[0]
            # 表示如重试的话只会有一个instance重试
            scheduler_utils.populate_retry(filter_properties,
                                           instances[0].uuid)
            hosts = self.scheduler_client.select_destinations(
                context, request_spec, filter_properties)
        except Exception as exc:
            for instance in instances:
                scheduler_driver.handle_schedule_error(context, exc,
                                                       instance.uuid,
                                                       request_spec)
            return

        for (instance, host) in itertools.izip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                                                       host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                context, instance.uuid)

            # (luzhq) self.compute_rpcapi = compute_rpcapi.ComputeAPI()
            self.compute_rpcapi.build_and_run_instance(
                context,
                instance=instance,
                host=host['host'],
                image=image,
                request_spec=request_spec,
                filter_properties=local_filter_props,
                admin_password=admin_password,
                injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=bdms,
                node=host['nodename'],
                limits=host['limits'])
Beispiel #42
0
    def unshelve_instance(self, context, instance, request_spec=None):
        sys_meta = instance.system_metadata

        def safe_image_show(ctx, image_id):
            if image_id:
                return self.image_api.get(ctx, image_id, show_deleted=False)
            else:
                raise exception.ImageNotFound(image_id='')

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            image = None
            image_id = sys_meta.get('shelved_image_id')
            # No need to check for image if image_id is None as
            # "shelved_image_id" key is not set for volume backed
            # instance during the shelve process
            if image_id:
                with compute_utils.EventReporter(
                    context, 'get_image_info', instance.uuid):
                    try:
                        image = safe_image_show(context, image_id)
                    except exception.ImageNotFound:
                        instance.vm_state = vm_states.ERROR
                        instance.save()

                        reason = _('Unshelve attempted but the image %s '
                                   'cannot be found.') % image_id

                        LOG.error(reason, instance=instance)
                        raise exception.UnshelveException(
                            instance_id=instance.uuid, reason=reason)

            try:
                with compute_utils.EventReporter(context, 'schedule_instances',
                                                 instance.uuid):
                    if not request_spec:
                        # NOTE(sbauza): We were unable to find an original
                        # RequestSpec object - probably because the instance is
                        # old. We need to mock that the old way
                        filter_properties = {}
                        request_spec = scheduler_utils.build_request_spec(
                            context, image, [instance])
                    else:
                        # NOTE(sbauza): Force_hosts/nodes needs to be reset
                        # if we want to make sure that the next destination
                        # is not forced to be the original host
                        request_spec.reset_forced_destinations()
                        # TODO(sbauza): Provide directly the RequestSpec object
                        # when _schedule_instances(),
                        # populate_filter_properties and populate_retry()
                        # accept it
                        filter_properties = request_spec.\
                            to_legacy_filter_properties_dict()
                        request_spec = request_spec.\
                            to_legacy_request_spec_dict()
                    scheduler_utils.populate_retry(filter_properties,
                                                   instance.uuid)
                    hosts = self._schedule_instances(
                            context, request_spec, filter_properties)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(
                            filter_properties, host_state)
                    (host, node) = (host_state['host'], host_state['nodename'])
                    self.compute_rpcapi.unshelve_instance(
                            context, instance, host, image=image,
                            filter_properties=filter_properties, node=node)
            except (exception.NoValidHost,
                    exception.UnsupportedPolicyException):
                instance.task_state = None
                instance.save()
                LOG.warning(_LW("No valid host found for unshelve instance"),
                            instance=instance)
                return
            except Exception:
                with excutils.save_and_reraise_exception():
                    instance.task_state = None
                    instance.save()
                    LOG.error(_LE("Unshelve attempted but an error "
                                  "has occurred"), instance=instance)
        else:
            LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or '
                          'SHELVED_OFFLOADED'), instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return
Beispiel #43
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
            # NOTE(takashin): In the case that the target host is specified,
            # if the migration is failed, it is not necessary to retry
            # the cold migration to the same host. So make sure that
            # reschedule will not occur.
            if 'host' in self.request_spec.requested_destination:
                legacy_props.pop('retry', None)
                self.request_spec.retry = None
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        migration = self._preallocate_migration()
        self.request_spec.ensure_project_id(self.instance)
        # For now, don't request alternates. A later patch in the series will
        # modify migration to use alternates instead of calling the scheduler
        # again.
        selection_lists = self.scheduler_client.select_destinations(
            self.context,
            self.request_spec, [self.instance.uuid],
            return_objects=True,
            return_alternates=False)
        # We only need the first item in the first list, as there is only one
        # instance, and we don't care about any alternates.
        selection = selection_lists[0][0]

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(self.context,
                                        self.instance,
                                        legacy_spec['image'],
                                        self.flavor,
                                        host,
                                        migration,
                                        self.reservations,
                                        request_spec=legacy_spec,
                                        filter_properties=legacy_props,
                                        node=node,
                                        clean_shutdown=self.clean_shutdown)
Beispiel #44
0
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and
                not isinstance(requested_networks,
                               objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList.from_tuples(
                requested_networks)
        # TODO(melwitt): Remove this in version 2.0 of the RPC API
        flavor = filter_properties.get('instance_type')
        if flavor and not isinstance(flavor, objects.Flavor):
            # Code downstream may expect extra_specs to be populated since it
            # is receiving an object, so lookup the flavor to ensure this.
            flavor = objects.Flavor.get_by_id(context, flavor['id'])
            filter_properties = dict(filter_properties, instance_type=flavor)

        request_spec = {}
        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            request_spec = scheduler_utils.build_request_spec(
                context, image, instances)
            scheduler_utils.populate_retry(
                filter_properties, instances[0].uuid)
            hosts = self._schedule_instances(
                    context, request_spec, filter_properties)
        except Exception as exc:
            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
            for instance in instances:
                self._set_vm_state_and_notify(
                    context, instance.uuid, 'build_instances', updates,
                    exc, request_spec)
                try:
                    # If the BuildRequest stays around then instance show/lists
                    # will pull from it rather than the errored instance.
                    self._destroy_build_request(context, instance)
                except exception.BuildRequestNotFound:
                    pass
                self._cleanup_allocated_networks(
                    context, instance, requested_networks)
            return

        for (instance, host) in six.moves.zip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)

            # This is populated in scheduler_utils.populate_retry
            num_attempts = local_filter_props.get('retry',
                                                  {}).get('num_attempts', 1)
            if num_attempts <= 1:
                # If this is a reschedule the instance is already mapped to
                # this cell and the BuildRequest is already deleted so ignore
                # the logic below.
                inst_mapping = self._populate_instance_mapping(context,
                                                               instance,
                                                               host)
                try:
                    self._destroy_build_request(context, instance)
                except exception.BuildRequestNotFound:
                    # This indicates an instance delete has been requested in
                    # the API. Stop the build, cleanup the instance_mapping and
                    # potentially the block_device_mappings
                    # TODO(alaski): Handle block_device_mapping cleanup
                    if inst_mapping:
                        inst_mapping.destroy()
                    return

            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host['host'], image=image,
                    request_spec=request_spec,
                    filter_properties=local_filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host['nodename'],
                    limits=host['limits'])
Beispiel #45
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
            # NOTE(takashin): In the case that the target host is specified,
            # if the migration is failed, it is not necessary to retry
            # the cold migration to the same host. So make sure that
            # reschedule will not occur.
            if 'host' in self.request_spec.requested_destination:
                legacy_props.pop('retry', None)
                self.request_spec.retry = None
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec,
                                          self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.scheduler_client.select_destinations(
                self.context,
                self.request_spec, [self.instance.uuid],
                return_objects=True,
                return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                        elevated, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." % {
                              "instance_uuid": self.instance.uuid
                          })
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        LOG.debug(
            "Calling prep_resize with selected host: %s; "
            "Selected node: %s; Alternates: %s",
            host,
            node,
            self.host_list,
            instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(self.context,
                                        self.instance,
                                        legacy_spec['image'],
                                        self.flavor,
                                        host,
                                        migration,
                                        request_spec=legacy_spec,
                                        filter_properties=legacy_props,
                                        node=node,
                                        clean_shutdown=self.clean_shutdown,
                                        host_list=self.host_list)
Beispiel #46
0
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and
                not isinstance(requested_networks,
                               objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList.from_tuples(
                requested_networks)
        # TODO(melwitt): Remove this in version 2.0 of the RPC API
        flavor = filter_properties.get('instance_type')
        if flavor and not isinstance(flavor, objects.Flavor):
            # Code downstream may expect extra_specs to be populated since it
            # is receiving an object, so lookup the flavor to ensure this.
            flavor = objects.Flavor.get_by_id(context, flavor['id'])
            filter_properties = dict(filter_properties, instance_type=flavor)

        request_spec = {}
        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            request_spec = scheduler_utils.build_request_spec(
                context, image, instances)
            scheduler_utils.populate_retry(
                filter_properties, instances[0].uuid)
            hosts = self._schedule_instances(
                    context, request_spec, filter_properties)
        except Exception as exc:
            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
            for instance in instances:
                self._set_vm_state_and_notify(
                    context, instance.uuid, 'build_instances', updates,
                    exc, request_spec)
                try:
                    # If the BuildRequest stays around then instance show/lists
                    # will pull from it rather than the errored instance.
                    self._destroy_build_request(context, instance)
                except exception.BuildRequestNotFound:
                    pass
                self._cleanup_allocated_networks(
                    context, instance, requested_networks)
            return

        for (instance, host) in six.moves.zip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)

            # This is populated in scheduler_utils.populate_retry
            num_attempts = local_filter_props.get('retry',
                                                  {}).get('num_attempts', 1)
            if num_attempts <= 1:
                # If this is a reschedule the instance is already mapped to
                # this cell and the BuildRequest is already deleted so ignore
                # the logic below.
                inst_mapping = self._populate_instance_mapping(context,
                                                               instance,
                                                               host)
                try:
                    self._destroy_build_request(context, instance)
                except exception.BuildRequestNotFound:
                    # This indicates an instance delete has been requested in
                    # the API. Stop the build, cleanup the instance_mapping and
                    # potentially the block_device_mappings
                    # TODO(alaski): Handle block_device_mapping cleanup
                    if inst_mapping:
                        inst_mapping.destroy()
                    return

            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host['host'], image=image,
                    request_spec=request_spec,
                    filter_properties=local_filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host['nodename'],
                    limits=host['limits'])
Beispiel #47
0
    def unshelve_instance(self, context, instance, request_spec=None):
        sys_meta = instance.system_metadata

        def safe_image_show(ctx, image_id):
            if image_id:
                return self.image_api.get(ctx, image_id, show_deleted=False)
            else:
                raise exception.ImageNotFound(image_id='')

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            image = None
            image_id = sys_meta.get('shelved_image_id')
            # No need to check for image if image_id is None as
            # "shelved_image_id" key is not set for volume backed
            # instance during the shelve process
            if image_id:
                with compute_utils.EventReporter(context, 'get_image_info',
                                                 instance.uuid):
                    try:
                        image = safe_image_show(context, image_id)
                    except exception.ImageNotFound:
                        instance.vm_state = vm_states.ERROR
                        instance.save()

                        reason = _('Unshelve attempted but the image %s '
                                   'cannot be found.') % image_id

                        LOG.error(reason, instance=instance)
                        raise exception.UnshelveException(
                            instance_id=instance.uuid, reason=reason)

            try:
                with compute_utils.EventReporter(context, 'schedule_instances',
                                                 instance.uuid):
                    if not request_spec:
                        # NOTE(sbauza): We were unable to find an original
                        # RequestSpec object - probably because the instance is
                        # old. We need to mock that the old way
                        filter_properties = {}
                        request_spec = scheduler_utils.build_request_spec(
                            context, image, [instance])
                    else:
                        # NOTE(sbauza): Force_hosts/nodes needs to be reset
                        # if we want to make sure that the next destination
                        # is not forced to be the original host
                        request_spec.reset_forced_destinations()
                        # TODO(sbauza): Provide directly the RequestSpec object
                        # when _schedule_instances(),
                        # populate_filter_properties and populate_retry()
                        # accept it
                        filter_properties = request_spec.\
                            to_legacy_filter_properties_dict()
                        request_spec = request_spec.\
                            to_legacy_request_spec_dict()
                    scheduler_utils.populate_retry(filter_properties,
                                                   instance.uuid)
                    hosts = self._schedule_instances(context, request_spec,
                                                     filter_properties)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(
                        filter_properties, host_state)
                    (host, node) = (host_state['host'], host_state['nodename'])
                    self.compute_rpcapi.unshelve_instance(
                        context,
                        instance,
                        host,
                        image=image,
                        filter_properties=filter_properties,
                        node=node)
            except (exception.NoValidHost,
                    exception.UnsupportedPolicyException):
                instance.task_state = None
                instance.save()
                LOG.warning(_LW("No valid host found for unshelve instance"),
                            instance=instance)
                return
            except Exception:
                with excutils.save_and_reraise_exception():
                    instance.task_state = None
                    instance.save()
                    LOG.error(_LE("Unshelve attempted but an error "
                                  "has occurred"),
                              instance=instance)
        else:
            LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or '
                          'SHELVED_OFFLOADED'),
                      instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return
Beispiel #48
0
    def schedule_and_build_instances(self, context, build_requests,
                                     request_specs, image,
                                     admin_password, injected_files,
                                     requested_networks, block_device_mapping):
        legacy_spec = request_specs[0].to_legacy_request_spec_dict()
        try:
            hosts = self._schedule_instances(context, legacy_spec,
                        request_specs[0].to_legacy_filter_properties_dict())
        except Exception as exc:
            LOG.exception(_LE('Failed to schedule instances'))
            self._bury_in_cell0(context, request_specs[0], exc,
                                build_requests=build_requests)
            return

        host_mapping_cache = {}

        for (build_request, request_spec, host) in six.moves.zip(
                build_requests, request_specs, hosts):
            filter_props = request_spec.to_legacy_filter_properties_dict()
            instance = build_request.get_new_instance(context)
            scheduler_utils.populate_retry(filter_props, instance.uuid)
            scheduler_utils.populate_filter_properties(filter_props,
                                                       host)

            # Convert host from the scheduler into a cell record
            if host['host'] not in host_mapping_cache:
                try:
                    host_mapping = objects.HostMapping.get_by_host(
                        context, host['host'])
                    host_mapping_cache[host['host']] = host_mapping
                except exception.HostMappingNotFound as exc:
                    LOG.error(_LE('No host-to-cell mapping found for selected '
                                  'host %(host)s. Setup is incomplete.'),
                              {'host': host['host']})
                    self._bury_in_cell0(context, request_spec, exc,
                                        build_requests=[build_request],
                                        instances=[instance])
                    continue
            else:
                host_mapping = host_mapping_cache[host['host']]

            cell = host_mapping.cell_mapping

            # Before we create the instance, let's make one final check that
            # the build request is still around and wasn't deleted by the user
            # already.
            try:
                objects.BuildRequest.get_by_instance_uuid(
                    context, instance.uuid)
            except exception.BuildRequestNotFound:
                # the build request is gone so we're done for this instance
                LOG.debug('While scheduling instance, the build request '
                          'was already deleted.', instance=instance)
                continue
            else:
                with obj_target_cell(instance, cell):
                    instance.create()

            # send a state update notification for the initial create to
            # show it going from non-existent to BUILDING
            notifications.send_update_with_states(context, instance, None,
                    vm_states.BUILDING, None, None, service="conductor")

            with obj_target_cell(instance, cell):
                objects.InstanceAction.action_start(
                    context, instance.uuid, instance_actions.CREATE,
                    want_result=False)
                instance_bdms = self._create_block_device_mapping(
                    instance.flavor, instance.uuid, block_device_mapping)

            # Update mapping for instance. Normally this check is guarded by
            # a try/except but if we're here we know that a newer nova-api
            # handled the build process and would have created the mapping
            inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
                context, instance.uuid)
            inst_mapping.cell_mapping = cell
            inst_mapping.save()

            if not self._delete_build_request(
                    context, build_request, instance, cell, instance_bdms):
                # The build request was deleted before/during scheduling so
                # the instance is gone and we don't have anything to build for
                # this one.
                continue

            # NOTE(danms): Compute RPC expects security group names or ids
            # not objects, so convert this to a list of names until we can
            # pass the objects.
            legacy_secgroups = [s.identifier
                                for s in request_spec.security_groups]

            with obj_target_cell(instance, cell):
                self.compute_rpcapi.build_and_run_instance(
                    context, instance=instance, image=image,
                    request_spec=request_spec,
                    filter_properties=filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=legacy_secgroups,
                    block_device_mapping=instance_bdms,
                    host=host['host'], node=host['nodename'],
                    limits=host['limits'])
Beispiel #49
0
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        request_spec = scheduler_utils.build_request_spec(context, image,
                                                          instances)
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and
                not isinstance(requested_networks,
                               objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(
                objects=[objects.NetworkRequest.from_tuple(t)
                         for t in requested_networks])
        # TODO(melwitt): Remove this in version 2.0 of the RPC API
        flavor = filter_properties.get('instance_type')
        if flavor and not isinstance(flavor, objects.Flavor):
            # Code downstream may expect extra_specs to be populated since it
            # is receiving an object, so lookup the flavor to ensure this.
            flavor = objects.Flavor.get_by_id(context, flavor['id'])
            filter_properties = dict(filter_properties, instance_type=flavor)

        try:
            scheduler_utils.setup_instance_group(context, request_spec,
                                                 filter_properties)
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            scheduler_utils.populate_retry(filter_properties,
                instances[0].uuid)
            hosts = self.scheduler_client.select_destinations(context,
                    request_spec, filter_properties)
        except Exception as exc:
            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
            for instance in instances:
                self._set_vm_state_and_notify(
                    context, instance.uuid, 'build_instances', updates,
                    exc, request_spec)
            return

        for (instance, host) in itertools.izip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)

            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host['host'], image=image,
                    request_spec=request_spec,
                    filter_properties=local_filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host['nodename'],
                    limits=host['limits'])
Beispiel #50
0
    def _execute(self):
        # NOTE(sbauza): Force_hosts/nodes needs to be reset if we want to make
        # sure that the next destination is not forced to be the original host.
        # This needs to be done before the populate_retry call otherwise
        # retries will be disabled if the server was created with a forced
        # host/node.
        self.request_spec.reset_forced_destinations()

        # TODO(sbauza): Remove once all the scheduler.utils methods accept a
        # RequestSpec object in the signature.
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec and
                    self.request_spec.requested_destination and
                        'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props,
                                           self.instance.uuid)

        port_res_req = self.network_api.get_requested_resource_for_instance(
            self.context, self.instance.uuid)
        # NOTE(gibi): When cyborg or other module wants to handle similar
        # non-nova resources then here we have to collect all the external
        # resource requests in a single list and add them to the RequestSpec.
        self.request_spec.requested_resources = port_res_req

        self._restrict_request_spec_to_cell(legacy_props)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        self.request_spec.ensure_network_metadata(self.instance)
        compute_utils.heal_reqspec_is_bfv(
            self.context, self.request_spec, self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection = self._schedule()

        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations.
            selection = self._reschedule()

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        # The availability_zone field was added in v1.1 of the Selection
        # object so make sure to handle the case where it is missing.
        if 'availability_zone' in selection:
            self.instance.availability_zone = selection.availability_zone
        else:
            self.instance.availability_zone = (
                availability_zones.get_host_availability_zone(
                    self.context, host))

        LOG.debug("Calling prep_resize with selected host: %s; "
                  "Selected node: %s; Alternates: %s", host, node,
                  self.host_list, instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            # NOTE(mriedem): Using request_spec.image here is potentially
            # dangerous if it is not kept up to date (i.e. rebuild/unshelve);
            # seems like the sane thing to do would be to pass the current
            # instance.image_meta since that is what MoveClaim will use for
            # any NUMA topology claims on the destination host...
            self.context, self.instance, self.request_spec.image,
            self.flavor, host, migration,
            request_spec=self.request_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)
Beispiel #51
0
    def _execute(self):
        # TODO(sbauza): Remove once all the scheduler.utils methods accept a
        # RequestSpec object in the signature.
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # TODO(gibi): We need to make sure that the requested_resources field
        # is re calculated based on neutron ports.

        self._restrict_request_spec_to_cell(legacy_props)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        self.request_spec.ensure_network_metadata(self.instance)
        compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec,
                                          self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.query_client.select_destinations(
                self.context,
                self.request_spec, [self.instance.uuid],
                return_objects=True,
                return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                        elevated, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." % {
                              "instance_uuid": self.instance.uuid
                          })
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        LOG.debug(
            "Calling prep_resize with selected host: %s; "
            "Selected node: %s; Alternates: %s",
            host,
            node,
            self.host_list,
            instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            # NOTE(mriedem): Using request_spec.image here is potentially
            # dangerous if it is not kept up to date (i.e. rebuild/unshelve);
            # seems like the sane thing to do would be to pass the current
            # instance.image_meta since that is what MoveClaim will use for
            # any NUMA topology claims on the destination host...
            self.context,
            self.instance,
            self.request_spec.image,
            self.flavor,
            host,
            migration,
            request_spec=self.request_spec,
            filter_properties=legacy_props,
            node=node,
            clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)
Beispiel #52
0
    def unshelve_instance(self, context, instance):
        sys_meta = instance.system_metadata

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
            snapshot_id = sys_meta.get('shelved_image_id')
            if snapshot_id:
                self._delete_image(context, snapshot_id)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            try:
                with compute_utils.EventReporter(context, self.db,
                                                 'get_image_info',
                                                 instance.uuid):
                    image = self._get_image(context,
                                            sys_meta['shelved_image_id'])
            except exception.ImageNotFound:
                with excutils.save_and_reraise_exception():
                    LOG.error(_('Unshelve attempted but vm_state not SHELVED '
                                'or SHELVED_OFFLOADED'),
                              instance=instance)
                    instance.vm_state = vm_states.ERROR
                    instance.save()

            try:
                with compute_utils.EventReporter(context, self.db,
                                                 'schedule_instances',
                                                 instance.uuid):
                    filter_properties = {}
                    hosts = self._schedule_instances(context, image,
                                                     filter_properties,
                                                     instance)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(
                        filter_properties, host_state)
                    (host, node) = (host_state['host'], host_state['nodename'])
                    self.compute_rpcapi.unshelve_instance(
                        context,
                        instance,
                        host,
                        image=image,
                        filter_properties=filter_properties,
                        node=node)
            except exception.NoValidHost as ex:
                instance.task_state = None
                instance.save()
                LOG.warning(_("No valid host found for unshelve instance"),
                            instance=instance)
                return
        else:
            LOG.error(_('Unshelve attempted but vm_state not SHELVED or '
                        'SHELVED_OFFLOADED'),
                      instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return

        for key in ['shelved_at', 'shelved_image_id', 'shelved_host']:
            if key in sys_meta:
                del (sys_meta[key])
        instance.system_metadata = sys_meta
        instance.save()
Beispiel #53
0
    def build_instances(self,
                        context,
                        instances,
                        image,
                        filter_properties,
                        admin_password,
                        injected_files,
                        requested_networks,
                        security_groups,
                        block_device_mapping=None,
                        legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        request_spec = scheduler_utils.build_request_spec(
            context, image, instances)
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and not isinstance(requested_networks,
                                                  objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(objects=[
                objects.NetworkRequest.from_tuple(t)
                for t in requested_networks
            ])
        # TODO(melwitt): Remove this in version 2.0 of the RPC API
        flavor = filter_properties.get('instance_type')
        if flavor and not isinstance(flavor, objects.Flavor):
            # Code downstream may expect extra_specs to be populated since it
            # is receiving an object, so lookup the flavor to ensure this.
            flavor = objects.Flavor.get_by_id(context, flavor['id'])
            filter_properties = dict(filter_properties, instance_type=flavor)

        try:
            scheduler_utils.setup_instance_group(context, request_spec,
                                                 filter_properties)
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            scheduler_utils.populate_retry(filter_properties,
                                           instances[0].uuid)
            hosts = self.scheduler_client.select_destinations(
                context, request_spec, filter_properties)
        except Exception as exc:
            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
            for instance in instances:
                self._set_vm_state_and_notify(context, instance.uuid,
                                              'build_instances', updates, exc,
                                              request_spec)
            return

        for (instance, host) in itertools.izip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
            scheduler_utils.populate_filter_properties(local_filter_props,
                                                       host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                context, instance.uuid)

            self.compute_rpcapi.build_and_run_instance(
                context,
                instance=instance,
                host=host['host'],
                image=image,
                request_spec=request_spec,
                filter_properties=local_filter_props,
                admin_password=admin_password,
                injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=bdms,
                node=host['nodename'],
                limits=host['limits'])
Beispiel #54
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec and
                    self.request_spec.requested_destination and
                        'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props,
                                           self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec and
                self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
            # NOTE(takashin): In the case that the target host is specified,
            # if the migration is failed, it is not necessary to retry
            # the cold migration to the same host. So make sure that
            # reschedule will not occur.
            if 'host' in self.request_spec.requested_destination:
                legacy_props.pop('retry', None)
                self.request_spec.retry = None
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.scheduler_client.select_destinations(
                    self.context, self.request_spec, [self.instance.uuid],
                    return_objects=True, return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                            elevated, self.reportclient, self.request_spec,
                            self.instance.uuid, alloc_req,
                            selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." %
                          {"instance_uuid": self.instance.uuid})
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(
                self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        LOG.debug("Calling prep_resize with selected host: %s; "
                  "Selected node: %s; Alternates: %s", host, node,
                  self.host_list, instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            self.context, self.instance, legacy_spec['image'],
            self.flavor, host, migration,
            request_spec=legacy_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)
Beispiel #55
0
    def reschedule(self, context, instance):
        """Rescheduler the given instance."""

        # if reschedule process rebuild, instance is possibly in REBUILD- task
        # state for a long time, or just stuck in the task state, while a
        # second reschedule request triggered if vm status is ERROR or the host
        # is fault, we should ignore this kind of duplicated request
        if instance['task_state'] in (task_states.REBUILDING,
                                      task_states.REBUILD_BLOCK_DEVICE_MAPPING,
                                      task_states.REBUILD_SPAWNING,
                                      task_states.DELETING):
            LOG.warning(_('instance task_state is %s, ignore this request'),
                        instance['task_state'],
                        instance=instance)
            return

        LOG.info('reschedule instance', instance=instance)

        orig_image_ref = instance['image_ref'] or ''

        current_instance_type = flavors.extract_flavor(instance)

        # Ignore current host
        filter_properties = {'ignore_hosts': []}
        if not CONF.allow_reschedule_to_same_host:
            filter_properties['ignore_hosts'].append(instance['host'])

        image_ref = instance.image_ref
        image = compute_utils.get_image_metadata(context, self.image_api,
                                                 image_ref, instance)

        request_spec = scheduler_utils.build_request_spec(
            context, image, [instance], instance_type=current_instance_type)

        # Get scheduler_hint info
        inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid(
            context, instance.uuid)
        injected_files = self.db.injected_files_get_by_instance_uuid(
            context, instance.uuid)
        request_networks = []
        if inst_extra:
            scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints
                                              or '{}')
            request_networks = jsonutils.loads(inst_extra.request_network
                                               or '[]')
        else:
            scheduler_hints = {}
        pci_requests = objects.InstancePCIRequests.\
            get_by_instance_uuid_and_newness(
                context, instance['uuid'], False)
        if pci_requests:
            filter_properties['pci_requests'] = pci_requests
        filter_properties['scheduler_hints'] = scheduler_hints

        LOG.info("reschedule filter_properties %s",
                 filter_properties,
                 instance=instance)

        self._record_action_start(context, instance, hw_actions.RESCHEDULE)
        try:
            hosts = self._select_destinations(context, instance, request_spec,
                                              filter_properties)
            host_state = hosts[0]['host']
            LOG.info("HA selected host %s", host_state, instance=instance)
        except exception.NoValidHost as ex:
            LOG.warning(_("No valid host found"), instance=instance)

            if instance['host']:
                self._try_local_reboot(context, instance, 'HARD')

            return

        bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
            context, instance.uuid)
        LOG.info("instance bdms %s",
                 jsonutils.to_primitive(bdms),
                 instance=instance)

        scheduler_utils.populate_filter_properties(filter_properties, hosts[0])

        def _get_network_info(nw_info):
            pci_req_id = None
            if len(nw_info) > 3:
                pci_req_id = nw_info[3]
            return (nw_info[0], nw_info[1], nw_info[2], pci_req_id)

        request_networks = [_get_network_info(n) for n in request_networks]

        alive_dict = {'alive': False, 'fault': False, 'count': 0}

        def async_check_live():
            try:
                self.compute_rpcapi.check_alive(context, host_state,
                                                'nova-api')
                alive_dict['alive'] = True
            except Exception as e:
                LOG.error(_LE('check alive fault, host %s, %s'), host_state, e)
                alive_dict['fault'] = True

        def _loop_check():
            if alive_dict['fault']:
                raise loopingcall.LoopingCallDone()

            if alive_dict['alive']:
                LOG.debug('compute service alive, host %s',
                          host_state,
                          instance=instance)
                raise loopingcall.LoopingCallDone()

            if alive_dict['count'] == 120:
                LOG.debug('check alive timeout, host %s',
                          host_state,
                          instance=instance)
                raise loopingcall.LoopingCallDone()

            alive_dict['count'] += 1

        # Clear the resource of instance on the source host
        if instance['host'] and self.judge_branch(context, instance,
                                                  request_networks):
            instance.task_state = task_states.REBUILDING
            instance.save()

            # in some extreme case rpc message will stack on HA dest host, to
            # avoid that we change rpc api build_and_run_instance to sync
            # 'call' instead of async 'cast', but we can't block outside
            # request, so use async way to call rpc method
            def _async_reschedule():
                # check dest compute service is alive
                utils.spawn_n(async_check_live)
                timer = loopingcall.FixedIntervalLoopingCall(_loop_check)
                timer.start(interval=1).wait()

                if not alive_dict['alive']:
                    LOG.warn(
                        '%s compute service seems down, revert instance '
                        'task state',
                        host_state,
                        instance=instance)
                    instance.task_state = None
                    instance.save()
                    return

                LOG.info('reschedule instance to host %s',
                         host_state,
                         instance=instance)
                try:
                    self.compute_rpcapi.sync_reschedule_instance(
                        context,
                        instance=instance,
                        new_pass=None,
                        injected_files=jsonutils.loads(injected_files),
                        image_ref=image_ref,
                        orig_image_ref=orig_image_ref,
                        orig_sys_metadata=None,
                        bdms=bdms,
                        host=host_state,
                        filter_properties=filter_properties)
                except Exception as e:
                    LOG.error(_LE('reschedule call failed: %s'), e)
                    self.db.instance_update(context,
                                            instance.uuid,
                                            task_state=None)

            utils.spawn_n(_async_reschedule)
        else:
            security_groups = self.db.security_group_get_by_instance(
                context, instance.uuid)
            block_device_mapping = \
                self.db.block_device_mapping_get_all_by_instance(
                    context, instance.uuid)
            request_spec.update({
                'block_device_mapping': block_device_mapping,
                'security_group': security_groups
            })

            # TODO(): Remove this in version 2.0 of the RPC API
            if (request_networks and not isinstance(
                    request_networks, objects.NetworkRequestList)):
                request_networks = objects.NetworkRequestList(objects=[
                    objects.NetworkRequest.from_tuple(t)
                    for t in request_networks
                ])

            # in some extreme case rpc message will stack on HA dest host, to
            # avoid that we change rpc api build_and_run_instance to sync
            # 'call' instead of async 'cast', but we can't block outside
            # request, so use async way to call rpc method
            def _async_build_and_run_instance():
                # check dest compute service is alive
                utils.spawn_n(async_check_live)
                timer = loopingcall.FixedIntervalLoopingCall(_loop_check)
                timer.start(interval=1).wait()

                if not alive_dict['alive']:
                    LOG.warn(
                        '%s compute service seems down, revert instance '
                        'task state',
                        host_state,
                        instance=instance)
                    instance.task_state = None
                    instance.save()
                    return

                LOG.info('build instance on host %s',
                         host_state,
                         instance=instance)
                self.compute_rpcapi.sync_build_and_run_instance(
                    context,
                    instance=instance,
                    host=host_state,
                    image=image,
                    request_spec=request_spec,
                    filter_properties=filter_properties,
                    admin_password=None,
                    injected_files=jsonutils.loads(injected_files),
                    requested_networks=request_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms,
                    node=host_state,
                    limits=hosts[0]['limits'])

            utils.spawn_n(_async_build_and_run_instance)
Beispiel #56
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)

        # WRS: add hosts to Server group host list for group members
        # that are migrating in progress
        if 'group_members' in legacy_props:
            metadetails = self.request_spec.instance_group['metadetails']
            is_best_effort = strutils.bool_from_string(
                metadetails.get('wrs-sg:best_effort', 'False'))

            if ('anti-affinity' in self.request_spec.instance_group['policies']
                    and not is_best_effort):
                group_members = self.request_spec.instance_group['members']

                for instance_uuid in group_members:
                    filters = {
                        'migration_type':
                        'migration',
                        'instance_uuid':
                        instance_uuid,
                        'status': [
                            'queued', 'pre-migrating', 'migrating',
                            'post-migrating', 'finished'
                        ]
                    }

                    migrations = objects.MigrationList.get_by_filters(
                        self.context, filters)

                    for migration in migrations:
                        if migration['source_compute'] not in \
                                self.request_spec.instance_group['hosts']:
                            self.request_spec.instance_group['hosts'].append(
                                migration['source_compute'])
                        if (migration['dest_compute'] and
                            (migration['dest_compute'] not in
                             self.request_spec.instance_group['hosts'])):
                            self.request_spec.instance_group['hosts'].append(
                                migration['dest_compute'])

                # refresh legacy_spec and legacy_props with latest request_spec
                legacy_spec = self.request_spec.to_legacy_request_spec_dict()
                legacy_props = self.\
                    request_spec.to_legacy_filter_properties_dict()

        scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        self.request_spec.ensure_project_id(self.instance)

        # WRS: determine offline cpus due to scaling to be used to calculate
        # placement service resource claim in scheduler
        self.request_spec.offline_cpus = \
                  scheduler_utils.determine_offline_cpus(
                         self.flavor, self.instance.numa_topology)
        # NOTE(danms): We don't pass enough information to the scheduler to
        # know that we have a boot-from-volume request.
        # TODO(danms): We need to pass more context to the scheduler here
        # in order to (a) handle boot-from-volume instances, as well as
        # (b) know which volume provider to request resource from.
        request_spec_copy = self.request_spec
        if self.instance.is_volume_backed():
            LOG.debug('Requesting zero root disk for '
                      'boot-from-volume instance')
            # Clone this so we don't mutate the RequestSpec that was passed in
            request_spec_copy = self.request_spec.obj_clone()
            request_spec_copy.flavor.root_gb = 0

        hosts = self.scheduler_client.select_destinations(
            self.context, request_spec_copy, [self.instance.uuid])
        host_state = hosts[0]

        scheduler_utils.populate_filter_properties(legacy_props, host_state)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (host_state['host'], host_state['nodename'])

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        self.compute_rpcapi.prep_resize(self.context,
                                        self.instance,
                                        legacy_spec['image'],
                                        self.flavor,
                                        host,
                                        self.reservations,
                                        request_spec=legacy_spec,
                                        filter_properties=legacy_props,
                                        node=node,
                                        clean_shutdown=self.clean_shutdown)

        # WRS: return request_spec for save to db but need to clear retry and
        # instance_group hosts so that next request starts cleanly
        self.request_spec.retry = None
        if self.request_spec.instance_group:
            self.request_spec.instance_group.hosts = []
        return self.request_spec
Beispiel #57
0
    def unshelve_instance(self, context, instance):
        sys_meta = instance.system_metadata

        def safe_image_show(ctx, image_id):
            if image_id:
                return self.image_api.get(ctx, image_id, show_deleted=False)
            else:
                raise exception.ImageNotFound(image_id='')

        if instance.vm_state == vm_states.SHELVED:
            instance.task_state = task_states.POWERING_ON
            instance.save(expected_task_state=task_states.UNSHELVING)
            self.compute_rpcapi.start_instance(context, instance)
            snapshot_id = sys_meta.get('shelved_image_id')
            if snapshot_id:
                self._delete_image(context, snapshot_id)
        elif instance.vm_state == vm_states.SHELVED_OFFLOADED:
            image_id = sys_meta.get('shelved_image_id')
            with compute_utils.EventReporter(context, 'get_image_info',
                                             instance.uuid):
                try:
                    image = safe_image_show(context, image_id)
                except exception.ImageNotFound:
                    instance.vm_state = vm_states.ERROR
                    instance.save()

                    if image_id:
                        reason = _('Unshelve attempted but the image %s '
                                   'cannot be found.') % image_id
                    else:
                        reason = _('Unshelve attempted but the image_id is '
                                   'not provided')

                    LOG.error(reason, instance=instance)
                    raise exception.UnshelveException(
                        instance_id=instance.uuid, reason=reason)

            try:
                with compute_utils.EventReporter(context, 'schedule_instances',
                                                 instance.uuid):
                    filter_properties = {}
                    hosts = self._schedule_instances(context, image,
                                                     filter_properties,
                                                     instance)
                    host_state = hosts[0]
                    scheduler_utils.populate_filter_properties(
                        filter_properties, host_state)
                    (host, node) = (host_state['host'], host_state['nodename'])
                    self.compute_rpcapi.unshelve_instance(
                        context,
                        instance,
                        host,
                        image=image,
                        filter_properties=filter_properties,
                        node=node)
            except exception.NoValidHost:
                instance.task_state = None
                instance.save()
                LOG.warning(_LW("No valid host found for unshelve instance"),
                            instance=instance)
                return
        else:
            LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or '
                          'SHELVED_OFFLOADED'),
                      instance=instance)
            instance.vm_state = vm_states.ERROR
            instance.save()
            return

        for key in ['shelved_at', 'shelved_image_id', 'shelved_host']:
            if key in sys_meta:
                del (sys_meta[key])
        instance.system_metadata = sys_meta
        instance.save()
Beispiel #58
0
    def schedule_and_build_instances(self, context, build_requests,
                                     request_specs, image,
                                     admin_password, injected_files,
                                     requested_networks, block_device_mapping):
        legacy_spec = request_specs[0].to_legacy_request_spec_dict()
        try:
            hosts = self._schedule_instances(context, legacy_spec,
                        request_specs[0].to_legacy_filter_properties_dict())
        except Exception as exc:
            LOG.exception(_LE('Failed to schedule instances'))
            self._bury_in_cell0(context, request_specs[0], exc,
                                build_requests=build_requests)
            return

        host_mapping_cache = {}

        for (build_request, request_spec, host) in six.moves.zip(
                build_requests, request_specs, hosts):
            filter_props = request_spec.to_legacy_filter_properties_dict()
            scheduler_utils.populate_filter_properties(filter_props,
                                                       host)
            instance = build_request.get_new_instance(context)

            # Convert host from the scheduler into a cell record
            if host['host'] not in host_mapping_cache:
                try:
                    host_mapping = objects.HostMapping.get_by_host(
                        context, host['host'])
                    host_mapping_cache[host['host']] = host_mapping
                except exception.HostMappingNotFound as exc:
                    LOG.error(_LE('No host-to-cell mapping found for selected '
                                  'host %(host)s. Setup is incomplete.'),
                              {'host': host['host']})
                    self._bury_in_cell0(context, request_spec, exc,
                                        build_requests=[build_request],
                                        instances=[instance])
                    continue
            else:
                host_mapping = host_mapping_cache[host['host']]

            cell = host_mapping.cell_mapping

            with obj_target_cell(instance, cell):
                instance.create()

            # send a state update notification for the initial create to
            # show it going from non-existent to BUILDING
            notifications.send_update_with_states(context, instance, None,
                    vm_states.BUILDING, None, None, service="conductor")

            objects.InstanceAction.action_start(
                context, instance.uuid, instance_actions.CREATE,
                want_result=False)

            with obj_target_cell(instance, cell):
                instance_bdms = self._create_block_device_mapping(
                    instance.flavor, instance.uuid, block_device_mapping)

            # Update mapping for instance. Normally this check is guarded by
            # a try/except but if we're here we know that a newer nova-api
            # handled the build process and would have created the mapping
            inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
                context, instance.uuid)
            inst_mapping.cell_mapping = cell
            inst_mapping.save()

            try:
                build_request.destroy()
            except exception.BuildRequestNotFound:
                # This indicates an instance deletion request has been
                # processed, and the build should halt here. Clean up the
                # bdm and instance record.
                with obj_target_cell(instance, cell):
                    try:
                        instance.destroy()
                    except exception.InstanceNotFound:
                        pass
                for bdm in instance_bdms:
                    with obj_target_cell(bdm, cell):
                        try:
                            bdm.destroy()
                        except exception.ObjectActionError:
                            pass
                return

            # NOTE(danms): Compute RPC expects security group names or ids
            # not objects, so convert this to a list of names until we can
            # pass the objects.
            legacy_secgroups = [s.identifier
                                for s in request_spec.security_groups]

            with obj_target_cell(instance, cell):
                self.compute_rpcapi.build_and_run_instance(
                    context, instance=instance, image=image,
                    request_spec=request_spec,
                    filter_properties=filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=legacy_secgroups,
                    block_device_mapping=instance_bdms,
                    host=host['host'], node=host['nodename'],
                    limits=host['limits'])