Exemple #1
0
    def test_claim_resources(self, mock_is_rebuild, mock_client):
        """Tests that when claim_resources() is called, that we appropriately
        call the placement client to claim resources for the instance.
        """
        mock_is_rebuild.return_value = False
        ctx = nova_context.RequestContext(user_id=uuids.user_id)
        spec_obj = objects.RequestSpec(project_id=uuids.project_id)
        instance_uuid = uuids.instance
        alloc_req = mock.sentinel.alloc_req
        mock_client.claim_resources.return_value = True

        res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid,
                alloc_req)

        mock_client.claim_resources.assert_called_once_with(
            ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id,
            uuids.user_id, allocation_request_version=None,
            consumer_generation=None)
        self.assertTrue(res)

        # Now do it again but with RequestSpec.user_id set.
        spec_obj.user_id = uuids.spec_user_id
        mock_client.reset_mock()
        utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid,
                              alloc_req)
        mock_client.claim_resources.assert_called_once_with(
            ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id,
            uuids.spec_user_id, allocation_request_version=None,
            consumer_generation=None)
Exemple #2
0
    def test_claim_resources(self, mock_is_rebuild, mock_client):
        """Tests that when claim_resources() is called, that we appropriately
        call the placement client to claim resources for the instance.
        """
        mock_is_rebuild.return_value = False
        ctx = nova_context.RequestContext(user_id=uuids.user_id)
        spec_obj = objects.RequestSpec(project_id=uuids.project_id)
        instance_uuid = uuids.instance
        alloc_req = mock.sentinel.alloc_req
        mock_client.claim_resources.return_value = True

        res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid,
                alloc_req)

        mock_client.claim_resources.assert_called_once_with(
            ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id,
            uuids.user_id, allocation_request_version=None,
            consumer_generation=None)
        self.assertTrue(res)

        # Now do it again but with RequestSpec.user_id set.
        spec_obj.user_id = uuids.spec_user_id
        mock_client.reset_mock()
        utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid,
                              alloc_req)
        mock_client.claim_resources.assert_called_once_with(
            ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id,
            uuids.spec_user_id, allocation_request_version=None,
            consumer_generation=None)
Exemple #3
0
 def test_claim_resouces_for_policy_check(self, mock_is_rebuild,
                                          mock_client):
     mock_is_rebuild.return_value = True
     ctx = mock.Mock(user_id=uuids.user_id)
     res = utils.claim_resources(ctx, None, mock.sentinel.spec_obj,
                                 mock.sentinel.instance_uuid, [])
     self.assertTrue(res)
     mock_is_rebuild.assert_called_once_with(mock.sentinel.spec_obj)
     self.assertFalse(mock_client.claim_resources.called)
Exemple #4
0
 def test_claim_resouces_for_policy_check(self, mock_is_rebuild,
         mock_client):
     mock_is_rebuild.return_value = True
     ctx = mock.Mock(user_id=uuids.user_id)
     res = utils.claim_resources(ctx, None, mock.sentinel.spec_obj,
             mock.sentinel.instance_uuid, [])
     self.assertTrue(res)
     mock_is_rebuild.assert_called_once_with(mock.sentinel.spec_obj)
     self.assertFalse(mock_client.claim_resources.called)
Exemple #5
0
 def _reschedule(self):
     # Since the resources on these alternates may have been consumed and
     # might not be able to support the migrated instance, we need to first
     # claim the resources to verify the host still has sufficient
     # available resources.
     elevated = self.context.elevated()
     host_available = False
     selection = None
     while self.host_list and not host_available:
         selection = self.host_list.pop(0)
         if (self.request_spec.requested_resources
                 and not self._support_resource_request(selection)):
             LOG.debug(
                 'Scheduler returned alternate host %(host)s as a possible '
                 'migration target for re-schedule but that host is not '
                 'new enough to support the migration with resource '
                 'request %(request)s. Trying another alternate.', {
                     'host': selection.service_host,
                     'request': self.request_spec.requested_resources
                 },
                 instance=self.instance)
             continue
         if selection.allocation_request:
             alloc_req = jsonutils.loads(selection.allocation_request)
         else:
             alloc_req = None
         if alloc_req:
             # If this call succeeds, the resources on the destination
             # host will be claimed by the instance.
             host_available = scheduler_utils.claim_resources(
                 elevated, self.reportclient, self.request_spec,
                 self.instance.uuid, alloc_req,
                 selection.allocation_request_version)
             if host_available:
                 scheduler_utils.fill_provider_mapping(
                     self.context, self.reportclient, self.request_spec,
                     selection)
         else:
             # Some deployments use different schedulers that do not
             # use Placement, so they will not have an
             # allocation_request to claim with. For those cases,
             # there is no concept of claiming, so just assume that
             # the host is valid.
             host_available = True
     # There are no more available hosts. Raise a MaxRetriesExceeded
     # exception in that case.
     if not host_available:
         reason = ("Exhausted all hosts available for retrying build "
                   "failures for instance %(instance_uuid)s." % {
                       "instance_uuid": self.instance.uuid
                   })
         raise exception.MaxRetriesExceeded(reason=reason)
     return selection
Exemple #6
0
    def test_claim_resources(self, mock_is_rebuild, mock_client):
        """Tests that when claim_resources() is called, that we appropriately
        call the placement client to claim resources for the instance.
        """
        mock_is_rebuild.return_value = False
        ctx = mock.Mock(user_id=uuids.user_id)
        spec_obj = mock.Mock(project_id=uuids.project_id)
        instance_uuid = uuids.instance
        alloc_req = mock.sentinel.alloc_req
        mock_client.claim_resources.return_value = True

        res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid,
                alloc_req)

        mock_client.claim_resources.assert_called_once_with(uuids.instance,
                mock.sentinel.alloc_req, uuids.project_id, uuids.user_id,
                allocation_request_version=None)
        self.assertTrue(res)
Exemple #7
0
    def test_claim_resources(self, mock_is_rebuild, mock_client):
        """Tests that when claim_resources() is called, that we appropriately
        call the placement client to claim resources for the instance.
        """
        mock_is_rebuild.return_value = False
        ctx = mock.Mock(user_id=uuids.user_id)
        spec_obj = mock.Mock(project_id=uuids.project_id)
        instance_uuid = uuids.instance
        alloc_req = mock.sentinel.alloc_req
        mock_client.claim_resources.return_value = True

        res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid,
                alloc_req)

        mock_client.claim_resources.assert_called_once_with(
            ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id,
            uuids.user_id, allocation_request_version=None)
        self.assertTrue(res)
    def _schedule(self,
                  context,
                  spec_obj,
                  instance_uuids,
                  alloc_reqs_by_rp_uuid,
                  provider_summaries,
                  allocation_request_version=None,
                  return_alternates=False):
        """Returns a list of lists of Selection objects.

        :param context: The RequestContext object
        :param spec_obj: The RequestSpec object
        :param instance_uuids: List of instance UUIDs to place or move.
        :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider
                                      UUID, of the allocation_requests that may
                                      be used to claim resources against
                                      matched hosts. If None, indicates either
                                      the placement API wasn't reachable or
                                      that there were no allocation_requests
                                      returned by the placement API. If the
                                      latter, the provider_summaries will be an
                                      empty dict, not None.
        :param provider_summaries: Optional dict, keyed by resource provider
                                   UUID, of information that will be used by
                                   the filters/weighers in selecting matching
                                   hosts for a request. If None, indicates that
                                   the scheduler driver should grab all compute
                                   node information locally and that the
                                   Placement API is not used. If an empty dict,
                                   indicates the Placement API returned no
                                   potential matches for the requested
                                   resources.
        :param allocation_request_version: The microversion used to request the
                                           allocations.
        :param return_alternates: When True, zero or more alternate hosts are
                                  returned with each selected host. The number
                                  of alternates is determined by the
                                  configuration option
                                  `CONF.scheduler.max_attempts`.
        """
        elevated = context.elevated()

        # Find our local list of acceptable hosts by repeatedly
        # filtering and weighing our options. Each time we choose a
        # host, we virtually consume resources on it so subsequent
        # selections can adjust accordingly.

        # Note: remember, we are using a generator-iterator here. So only
        # traverse this list once. This can bite you if the hosts
        # are being scanned in a filter or weighing function.
        hosts = self._get_all_host_states(elevated, spec_obj,
                                          provider_summaries)

        # NOTE(sbauza): The RequestSpec.num_instances field contains the number
        # of instances created when the RequestSpec was used to first boot some
        # instances. This is incorrect when doing a move or resize operation,
        # so prefer the length of instance_uuids unless it is None.
        num_instances = (len(instance_uuids)
                         if instance_uuids else spec_obj.num_instances)

        # For each requested instance, we want to return a host whose resources
        # for the instance have been claimed, along with zero or more
        # alternates. These alternates will be passed to the cell that the
        # selected host is in, so that if for some reason the build fails, the
        # cell conductor can retry building the instance on one of these
        # alternates instead of having to simply fail. The number of alternates
        # is based on CONF.scheduler.max_attempts; note that if there are not
        # enough filtered hosts to provide the full number of alternates, the
        # list of hosts may be shorter than this amount.
        num_alts = (CONF.scheduler.max_attempts -
                    1 if return_alternates else 0)

        if (instance_uuids is None or not self.USES_ALLOCATION_CANDIDATES
                or alloc_reqs_by_rp_uuid is None):
            # We need to support the caching scheduler, which doesn't use the
            # placement API (and has USES_ALLOCATION_CANDIDATE = False) and
            # therefore we skip all the claiming logic for that scheduler
            # driver. Also, if there was a problem communicating with the
            # placement API, alloc_reqs_by_rp_uuid will be None, so we skip
            # claiming in that case as well. In the case where instance_uuids
            # is None, that indicates an older conductor, so we need to return
            # the objects without alternates. They will be converted back to
            # the older dict format representing HostState objects.
            return self._legacy_find_hosts(context,
                                           num_instances,
                                           spec_obj,
                                           hosts,
                                           num_alts,
                                           instance_uuids=instance_uuids)

        # A list of the instance UUIDs that were successfully claimed against
        # in the placement API. If we are not able to successfully claim for
        # all involved instances, we use this list to remove those allocations
        # before returning
        claimed_instance_uuids = []

        # The list of hosts that have been selected (and claimed).
        claimed_hosts = []

        for num, instance_uuid in enumerate(instance_uuids):
            # In a multi-create request, the first request spec from the list
            # is passed to the scheduler and that request spec's instance_uuid
            # might not be the same as the instance we're processing, so we
            # update the instance_uuid in that case before passing the request
            # spec to filters since at least one filter
            # (ServerGroupAntiAffinityFilter) depends on that information being
            # accurate.
            spec_obj.instance_uuid = instance_uuid
            # Reset the field so it's not persisted accidentally.
            spec_obj.obj_reset_changes(['instance_uuid'])

            hosts = self._get_sorted_hosts(spec_obj, hosts, num)
            if not hosts:
                # NOTE(jaypipes): If we get here, that means not all instances
                # in instance_uuids were able to be matched to a selected host.
                # Any allocations will be cleaned up in the
                # _ensure_sufficient_hosts() call.
                break

            # Attempt to claim the resources against one or more resource
            # providers, looping over the sorted list of possible hosts
            # looking for an allocation_request that contains that host's
            # resource provider UUID
            claimed_host = None
            for host in hosts:
                cn_uuid = host.uuid
                if cn_uuid not in alloc_reqs_by_rp_uuid:
                    msg = ("A host state with uuid = '%s' that did not have a "
                           "matching allocation_request was encountered while "
                           "scheduling. This host was skipped.")
                    LOG.debug(msg, cn_uuid)
                    continue

                alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
                # TODO(jaypipes): Loop through all allocation_requests instead
                # of just trying the first one. For now, since we'll likely
                # want to order the allocation_requests in the future based on
                # information in the provider summaries, we'll just try to
                # claim resources using the first allocation_request
                alloc_req = alloc_reqs[0]
                if utils.claim_resources(
                        elevated,
                        self.placement_client,
                        spec_obj,
                        instance_uuid,
                        alloc_req,
                        allocation_request_version=allocation_request_version):
                    claimed_host = host
                    break

            if claimed_host is None:
                # We weren't able to claim resources in the placement API
                # for any of the sorted hosts identified. So, clean up any
                # successfully-claimed resources for prior instances in
                # this request and return an empty list which will cause
                # select_destinations() to raise NoValidHost
                LOG.debug("Unable to successfully claim against any host.")
                break

            claimed_instance_uuids.append(instance_uuid)
            claimed_hosts.append(claimed_host)

            # Now consume the resources so the filter/weights will change for
            # the next instance.
            self._consume_selected_host(claimed_host,
                                        spec_obj,
                                        instance_uuid=instance_uuid)

        # Check if we were able to fulfill the request. If not, this call will
        # raise a NoValidHost exception.
        self._ensure_sufficient_hosts(context, claimed_hosts, num_instances,
                                      claimed_instance_uuids)

        # We have selected and claimed hosts for each instance. Now we need to
        # find alternates for each host.
        selections_to_return = self._get_alternate_hosts(
            claimed_hosts, spec_obj, hosts, num, num_alts,
            alloc_reqs_by_rp_uuid, allocation_request_version)
        return selections_to_return
Exemple #9
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec and
                    self.request_spec.requested_destination and
                        'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props,
                                           self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec and
                self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
            # NOTE(takashin): In the case that the target host is specified,
            # if the migration is failed, it is not necessary to retry
            # the cold migration to the same host. So make sure that
            # reschedule will not occur.
            if 'host' in self.request_spec.requested_destination:
                legacy_props.pop('retry', None)
                self.request_spec.retry = None
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.scheduler_client.select_destinations(
                    self.context, self.request_spec, [self.instance.uuid],
                    return_objects=True, return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                            elevated, self.reportclient, self.request_spec,
                            self.instance.uuid, alloc_req,
                            selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." %
                          {"instance_uuid": self.instance.uuid})
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(
                self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        LOG.debug("Calling prep_resize with selected host: %s; "
                  "Selected node: %s; Alternates: %s", host, node,
                  self.host_list, instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            self.context, self.instance, legacy_spec['image'],
            self.flavor, host, migration,
            request_spec=legacy_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)
Exemple #10
0
    def _get_host_supporting_request(self, selection_list):
        """Return the first compute selection from the selection_list where
        the service is new enough to support resource request during migration
        and the resources claimed successfully.

        :param selection_list: a list of Selection objects returned by the
            scheduler
        :return: A two tuple. The first item is a Selection object
            representing the host that supports the request. The second item
            is a list of Selection objects representing the remaining alternate
            hosts.
        :raises MaxRetriesExceeded: if none of the hosts in the selection_list
            is new enough to support the request or we cannot claim resource
            on any of the hosts that are new enough.
        """

        if not self.request_spec.requested_resources:
            return selection_list[0], selection_list[1:]

        # Scheduler allocated resources on the first host. So check if the
        # first host is new enough
        if self._support_resource_request(selection_list[0]):
            return selection_list[0], selection_list[1:]

        # First host is old, so we need to use an alternate. Therefore we have
        # to remove the allocation from the first host.
        self.reportclient.delete_allocation_for_instance(
            self.context, self.instance.uuid)
        LOG.debug(
            'Scheduler returned host %(host)s as a possible migration target '
            'but that host is not new enough to support the migration with '
            'resource request %(request)s or the compute RPC is pinned to '
            'less than 5.2. Trying alternate hosts.',
            {'host': selection_list[0].service_host,
             'request': self.request_spec.requested_resources},
            instance=self.instance)

        alternates = selection_list[1:]

        for i, selection in enumerate(alternates):
            if self._support_resource_request(selection):
                # this host is new enough so we need to try to claim resources
                # on it
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(
                        selection.allocation_request)
                    resource_claimed = scheduler_utils.claim_resources(
                        self.context, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)

                    if not resource_claimed:
                        LOG.debug(
                            'Scheduler returned alternate host %(host)s as a '
                            'possible migration target but resource claim '
                            'failed on that host. Trying another alternate.',
                            {'host': selection.service_host},
                            instance=self.instance)
                    else:
                        return selection, alternates[i + 1:]

                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the resources are available.
                    return selection, alternates[i + 1:]

            else:
                LOG.debug(
                    'Scheduler returned alternate host %(host)s as a possible '
                    'migration target but that host is not new enough to '
                    'support the migration with resource request %(request)s '
                    'or the compute RPC is pinned to less than 5.2. '
                    'Trying another alternate.',
                    {'host': selection.service_host,
                     'request': self.request_spec.requested_resources},
                    instance=self.instance)

        # if we reach this point then none of the hosts was new enough for the
        # request or we failed to claim resources on every alternate
        reason = ("Exhausted all hosts available during compute service level "
                  "check for instance %(instance_uuid)s." %
                  {"instance_uuid": self.instance.uuid})
        raise exception.MaxRetriesExceeded(reason=reason)
Exemple #11
0
    def _execute(self):
        # TODO(sbauza): Remove once all the scheduler.utils methods accept a
        # RequestSpec object in the signature.
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec and
                    self.request_spec.requested_destination and
                        'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props,
                                           self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # TODO(gibi): We need to make sure that the requested_resources field
        # is re calculated based on neutron ports.

        self._restrict_request_spec_to_cell(legacy_props)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        self.request_spec.ensure_network_metadata(self.instance)
        compute_utils.heal_reqspec_is_bfv(
            self.context, self.request_spec, self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.query_client.select_destinations(
                    self.context, self.request_spec, [self.instance.uuid],
                    return_objects=True, return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                            elevated, self.reportclient, self.request_spec,
                            self.instance.uuid, alloc_req,
                            selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." %
                          {"instance_uuid": self.instance.uuid})
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(
                self.context, host))

        LOG.debug("Calling prep_resize with selected host: %s; "
                  "Selected node: %s; Alternates: %s", host, node,
                  self.host_list, instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            # NOTE(mriedem): Using request_spec.image here is potentially
            # dangerous if it is not kept up to date (i.e. rebuild/unshelve);
            # seems like the sane thing to do would be to pass the current
            # instance.image_meta since that is what MoveClaim will use for
            # any NUMA topology claims on the destination host...
            self.context, self.instance, self.request_spec.image,
            self.flavor, host, migration,
            request_spec=self.request_spec, filter_properties=legacy_props,
            node=node, clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)
Exemple #12
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
            # NOTE(takashin): In the case that the target host is specified,
            # if the migration is failed, it is not necessary to retry
            # the cold migration to the same host. So make sure that
            # reschedule will not occur.
            if 'host' in self.request_spec.requested_destination:
                legacy_props.pop('retry', None)
                self.request_spec.retry = None
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec,
                                          self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.scheduler_client.select_destinations(
                self.context,
                self.request_spec, [self.instance.uuid],
                return_objects=True,
                return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                        elevated, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." % {
                              "instance_uuid": self.instance.uuid
                          })
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        LOG.debug(
            "Calling prep_resize with selected host: %s; "
            "Selected node: %s; Alternates: %s",
            host,
            node,
            self.host_list,
            instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(self.context,
                                        self.instance,
                                        legacy_spec['image'],
                                        self.flavor,
                                        host,
                                        migration,
                                        request_spec=legacy_spec,
                                        filter_properties=legacy_props,
                                        node=node,
                                        clean_shutdown=self.clean_shutdown,
                                        host_list=self.host_list)
Exemple #13
0
    def _schedule(self, context, spec_obj, instance_uuids,
            alloc_reqs_by_rp_uuid, provider_summaries,
            allocation_request_version=None, return_alternates=False):
        """Returns a list of lists of Selection objects.

        :param context: The RequestContext object
        :param spec_obj: The RequestSpec object
        :param instance_uuids: List of instance UUIDs to place or move.
        :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider
                                      UUID, of the allocation_requests that may
                                      be used to claim resources against
                                      matched hosts. If None, indicates either
                                      the placement API wasn't reachable or
                                      that there were no allocation_requests
                                      returned by the placement API. If the
                                      latter, the provider_summaries will be an
                                      empty dict, not None.
        :param provider_summaries: Optional dict, keyed by resource provider
                                   UUID, of information that will be used by
                                   the filters/weighers in selecting matching
                                   hosts for a request. If None, indicates that
                                   the scheduler driver should grab all compute
                                   node information locally and that the
                                   Placement API is not used. If an empty dict,
                                   indicates the Placement API returned no
                                   potential matches for the requested
                                   resources.
        :param allocation_request_version: The microversion used to request the
                                           allocations.
        :param return_alternates: When True, zero or more alternate hosts are
                                  returned with each selected host. The number
                                  of alternates is determined by the
                                  configuration option
                                  `CONF.scheduler.max_attempts`.
        """
        elevated = context.elevated()

        # Find our local list of acceptable hosts by repeatedly
        # filtering and weighing our options. Each time we choose a
        # host, we virtually consume resources on it so subsequent
        # selections can adjust accordingly.

        # Note: remember, we are using a generator-iterator here. So only
        # traverse this list once. This can bite you if the hosts
        # are being scanned in a filter or weighing function.
        hosts = self._get_all_host_states(elevated, spec_obj,
            provider_summaries)

        # NOTE(sbauza): The RequestSpec.num_instances field contains the number
        # of instances created when the RequestSpec was used to first boot some
        # instances. This is incorrect when doing a move or resize operation,
        # so prefer the length of instance_uuids unless it is None.
        num_instances = (len(instance_uuids) if instance_uuids
                         else spec_obj.num_instances)

        # For each requested instance, we want to return a host whose resources
        # for the instance have been claimed, along with zero or more
        # alternates. These alternates will be passed to the cell that the
        # selected host is in, so that if for some reason the build fails, the
        # cell conductor can retry building the instance on one of these
        # alternates instead of having to simply fail. The number of alternates
        # is based on CONF.scheduler.max_attempts; note that if there are not
        # enough filtered hosts to provide the full number of alternates, the
        # list of hosts may be shorter than this amount.
        num_alts = (CONF.scheduler.max_attempts - 1
                    if return_alternates else 0)

        if (instance_uuids is None or
                not self.USES_ALLOCATION_CANDIDATES or
                alloc_reqs_by_rp_uuid is None):
            # We need to support the caching scheduler, which doesn't use the
            # placement API (and has USES_ALLOCATION_CANDIDATE = False) and
            # therefore we skip all the claiming logic for that scheduler
            # driver. Also, if there was a problem communicating with the
            # placement API, alloc_reqs_by_rp_uuid will be None, so we skip
            # claiming in that case as well. In the case where instance_uuids
            # is None, that indicates an older conductor, so we need to return
            # the objects without alternates. They will be converted back to
            # the older dict format representing HostState objects.
            return self._legacy_find_hosts(context, num_instances, spec_obj,
                                           hosts, num_alts)

        # A list of the instance UUIDs that were successfully claimed against
        # in the placement API. If we are not able to successfully claim for
        # all involved instances, we use this list to remove those allocations
        # before returning
        claimed_instance_uuids = []

        # The list of hosts that have been selected (and claimed).
        claimed_hosts = []

        for num in range(num_instances):
            hosts = self._get_sorted_hosts(spec_obj, hosts, num)
            if not hosts:
                # NOTE(jaypipes): If we get here, that means not all instances
                # in instance_uuids were able to be matched to a selected host.
                # Any allocations will be cleaned up in the
                # _ensure_sufficient_hosts() call.
                break

            instance_uuid = instance_uuids[num]
            # Attempt to claim the resources against one or more resource
            # providers, looping over the sorted list of possible hosts
            # looking for an allocation_request that contains that host's
            # resource provider UUID
            claimed_host = None
            for host in hosts:
                cn_uuid = host.uuid
                if cn_uuid not in alloc_reqs_by_rp_uuid:
                    msg = ("A host state with uuid = '%s' that did not have a "
                          "matching allocation_request was encountered while "
                          "scheduling. This host was skipped.")
                    LOG.debug(msg, cn_uuid)
                    continue

                alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
                # TODO(jaypipes): Loop through all allocation_requests instead
                # of just trying the first one. For now, since we'll likely
                # want to order the allocation_requests in the future based on
                # information in the provider summaries, we'll just try to
                # claim resources using the first allocation_request
                alloc_req = alloc_reqs[0]
                if utils.claim_resources(elevated, self.placement_client,
                        spec_obj, instance_uuid, alloc_req,
                        allocation_request_version=allocation_request_version):
                    claimed_host = host
                    break

            if claimed_host is None:
                # We weren't able to claim resources in the placement API
                # for any of the sorted hosts identified. So, clean up any
                # successfully-claimed resources for prior instances in
                # this request and return an empty list which will cause
                # select_destinations() to raise NoValidHost
                LOG.debug("Unable to successfully claim against any host.")
                break

            claimed_instance_uuids.append(instance_uuid)
            claimed_hosts.append(claimed_host)

            # Now consume the resources so the filter/weights will change for
            # the next instance.
            self._consume_selected_host(claimed_host, spec_obj)

        # Check if we were able to fulfill the request. If not, this call will
        # raise a NoValidHost exception.
        self._ensure_sufficient_hosts(context, claimed_hosts, num_instances,
                claimed_instance_uuids)

        # We have selected and claimed hosts for each instance. Now we need to
        # find alternates for each host.
        selections_to_return = self._get_alternate_hosts(
            claimed_hosts, spec_obj, hosts, num, num_alts,
            alloc_reqs_by_rp_uuid, allocation_request_version)
        return selections_to_return
Exemple #14
0
    def _execute(self):
        # TODO(sbauza): Remove once all the scheduler.utils methods accept a
        # RequestSpec object in the signature.
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # TODO(gibi): We need to make sure that the requested_resources field
        # is re calculated based on neutron ports.

        self._restrict_request_spec_to_cell(legacy_props)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        self.request_spec.ensure_network_metadata(self.instance)
        compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec,
                                          self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.query_client.select_destinations(
                self.context,
                self.request_spec, [self.instance.uuid],
                return_objects=True,
                return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                        elevated, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." % {
                              "instance_uuid": self.instance.uuid
                          })
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        LOG.debug(
            "Calling prep_resize with selected host: %s; "
            "Selected node: %s; Alternates: %s",
            host,
            node,
            self.host_list,
            instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            # NOTE(mriedem): Using request_spec.image here is potentially
            # dangerous if it is not kept up to date (i.e. rebuild/unshelve);
            # seems like the sane thing to do would be to pass the current
            # instance.image_meta since that is what MoveClaim will use for
            # any NUMA topology claims on the destination host...
            self.context,
            self.instance,
            self.request_spec.image,
            self.flavor,
            host,
            migration,
            request_spec=self.request_spec,
            filter_properties=legacy_props,
            node=node,
            clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)