예제 #1
0
파일: utils.py 프로젝트: panguan737/nova
def populate_retry(filter_properties, instance_uuid):
    max_attempts = CONF.scheduler.max_attempts
    force_hosts = filter_properties.get('force_hosts', [])
    force_nodes = filter_properties.get('force_nodes', [])

    # In the case of multiple force hosts/nodes, scheduler should not
    # disable retry filter but traverse all force hosts/nodes one by
    # one till scheduler gets a valid target host.
    if (max_attempts == 1 or len(force_hosts) == 1 or len(force_nodes) == 1):
        # re-scheduling is disabled.
        return

    # retry is enabled, update attempt count:
    retry = filter_properties.setdefault(
        'retry',
        {
            'num_attempts': 0,
            'hosts': []  # list of compute hosts tried
        })
    retry['num_attempts'] += 1

    _log_compute_error(instance_uuid, retry)
    exc_reason = retry.pop('exc_reason', None)

    if retry['num_attempts'] > max_attempts:
        msg = (_('Exceeded max scheduling attempts %(max_attempts)d '
                 'for instance %(instance_uuid)s. '
                 'Last exception: %(exc_reason)s') % {
                     'max_attempts': max_attempts,
                     'instance_uuid': instance_uuid,
                     'exc_reason': exc_reason
                 })
        raise exception.MaxRetriesExceeded(reason=msg)
예제 #2
0
    def _check_not_over_max_retries(self, attempted_hosts):
        if CONF.migrate_max_retries == -1:
            return

        retries = len(attempted_hosts) - 1
        if retries > CONF.migrate_max_retries:
            msg = (_('Exceeded max scheduling retries %(max_retries)d for '
                     'instance %(instance_uuid)s during live migration')
                   % {'max_retries': retries,
                      'instance_uuid': self.instance.uuid})
            raise exception.MaxRetriesExceeded(reason=msg)
예제 #3
0
 def _reschedule(self):
     # Since the resources on these alternates may have been consumed and
     # might not be able to support the migrated instance, we need to first
     # claim the resources to verify the host still has sufficient
     # available resources.
     elevated = self.context.elevated()
     host_available = False
     selection = None
     while self.host_list and not host_available:
         selection = self.host_list.pop(0)
         if (self.request_spec.requested_resources
                 and not self._support_resource_request(selection)):
             LOG.debug(
                 'Scheduler returned alternate host %(host)s as a possible '
                 'migration target for re-schedule but that host is not '
                 'new enough to support the migration with resource '
                 'request %(request)s. Trying another alternate.', {
                     'host': selection.service_host,
                     'request': self.request_spec.requested_resources
                 },
                 instance=self.instance)
             continue
         if selection.allocation_request:
             alloc_req = jsonutils.loads(selection.allocation_request)
         else:
             alloc_req = None
         if alloc_req:
             # If this call succeeds, the resources on the destination
             # host will be claimed by the instance.
             host_available = scheduler_utils.claim_resources(
                 elevated, self.reportclient, self.request_spec,
                 self.instance.uuid, alloc_req,
                 selection.allocation_request_version)
             if host_available:
                 scheduler_utils.fill_provider_mapping(
                     self.context, self.reportclient, self.request_spec,
                     selection)
         else:
             # Some deployments use different schedulers that do not
             # use Placement, so they will not have an
             # allocation_request to claim with. For those cases,
             # there is no concept of claiming, so just assume that
             # the host is valid.
             host_available = True
     # There are no more available hosts. Raise a MaxRetriesExceeded
     # exception in that case.
     if not host_available:
         reason = ("Exhausted all hosts available for retrying build "
                   "failures for instance %(instance_uuid)s." % {
                       "instance_uuid": self.instance.uuid
                   })
         raise exception.MaxRetriesExceeded(reason=reason)
     return selection
예제 #4
0
    def _get_host_supporting_request(self, selection_list):
        """Return the first compute selection from the selection_list where
        the service is new enough to support resource request during migration
        and the resources claimed successfully.

        :param selection_list: a list of Selection objects returned by the
            scheduler
        :return: A two tuple. The first item is a Selection object
            representing the host that supports the request. The second item
            is a list of Selection objects representing the remaining alternate
            hosts.
        :raises MaxRetriesExceeded: if none of the hosts in the selection_list
            is new enough to support the request or we cannot claim resource
            on any of the hosts that are new enough.
        """

        if not self.request_spec.requested_resources:
            return selection_list[0], selection_list[1:]

        # Scheduler allocated resources on the first host. So check if the
        # first host is new enough
        if self._support_resource_request(selection_list[0]):
            return selection_list[0], selection_list[1:]

        # First host is old, so we need to use an alternate. Therefore we have
        # to remove the allocation from the first host.
        self.reportclient.delete_allocation_for_instance(
            self.context, self.instance.uuid)
        LOG.debug(
            'Scheduler returned host %(host)s as a possible migration target '
            'but that host is not new enough to support the migration with '
            'resource request %(request)s or the compute RPC is pinned to '
            'less than 5.2. Trying alternate hosts.',
            {'host': selection_list[0].service_host,
             'request': self.request_spec.requested_resources},
            instance=self.instance)

        alternates = selection_list[1:]

        for i, selection in enumerate(alternates):
            if self._support_resource_request(selection):
                # this host is new enough so we need to try to claim resources
                # on it
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(
                        selection.allocation_request)
                    resource_claimed = scheduler_utils.claim_resources(
                        self.context, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)

                    if not resource_claimed:
                        LOG.debug(
                            'Scheduler returned alternate host %(host)s as a '
                            'possible migration target but resource claim '
                            'failed on that host. Trying another alternate.',
                            {'host': selection.service_host},
                            instance=self.instance)
                    else:
                        return selection, alternates[i + 1:]

                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the resources are available.
                    return selection, alternates[i + 1:]

            else:
                LOG.debug(
                    'Scheduler returned alternate host %(host)s as a possible '
                    'migration target but that host is not new enough to '
                    'support the migration with resource request %(request)s '
                    'or the compute RPC is pinned to less than 5.2. '
                    'Trying another alternate.',
                    {'host': selection.service_host,
                     'request': self.request_spec.requested_resources},
                    instance=self.instance)

        # if we reach this point then none of the hosts was new enough for the
        # request or we failed to claim resources on every alternate
        reason = ("Exhausted all hosts available during compute service level "
                  "check for instance %(instance_uuid)s." %
                  {"instance_uuid": self.instance.uuid})
        raise exception.MaxRetriesExceeded(reason=reason)
예제 #5
0
    def _execute(self):
        # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
        # object in the signature and all the scheduler.utils methods too
        legacy_spec = self.request_spec.to_legacy_request_spec_dict()
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # NOTE(danms): Right now we only support migrate to the same
        # cell as the current instance, so request that the scheduler
        # limit thusly.
        instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
            self.context, self.instance.uuid)
        LOG.debug('Requesting cell %(cell)s while migrating',
                  {'cell': instance_mapping.cell_mapping.identity},
                  instance=self.instance)
        if ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination):
            self.request_spec.requested_destination.cell = (
                instance_mapping.cell_mapping)
            # NOTE(takashin): In the case that the target host is specified,
            # if the migration is failed, it is not necessary to retry
            # the cold migration to the same host. So make sure that
            # reschedule will not occur.
            if 'host' in self.request_spec.requested_destination:
                legacy_props.pop('retry', None)
                self.request_spec.retry = None
        else:
            self.request_spec.requested_destination = objects.Destination(
                cell=instance_mapping.cell_mapping)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec,
                                          self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.scheduler_client.select_destinations(
                self.context,
                self.request_spec, [self.instance.uuid],
                return_objects=True,
                return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                        elevated, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." % {
                              "instance_uuid": self.instance.uuid
                          })
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        # FIXME(sbauza): Serialize/Unserialize the legacy dict because of
        # oslo.messaging #1529084 to transform datetime values into strings.
        # tl;dr: datetimes in dicts are not accepted as correct values by the
        # rpc fake driver.
        legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))

        LOG.debug(
            "Calling prep_resize with selected host: %s; "
            "Selected node: %s; Alternates: %s",
            host,
            node,
            self.host_list,
            instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(self.context,
                                        self.instance,
                                        legacy_spec['image'],
                                        self.flavor,
                                        host,
                                        migration,
                                        request_spec=legacy_spec,
                                        filter_properties=legacy_props,
                                        node=node,
                                        clean_shutdown=self.clean_shutdown,
                                        host_list=self.host_list)
예제 #6
0
    def _execute(self):
        # TODO(sbauza): Remove once all the scheduler.utils methods accept a
        # RequestSpec object in the signature.
        legacy_props = self.request_spec.to_legacy_filter_properties_dict()
        scheduler_utils.setup_instance_group(self.context, self.request_spec)
        # If a target host is set in a requested destination,
        # 'populate_retry' need not be executed.
        if not ('requested_destination' in self.request_spec
                and self.request_spec.requested_destination
                and 'host' in self.request_spec.requested_destination):
            scheduler_utils.populate_retry(legacy_props, self.instance.uuid)

        # NOTE(sbauza): Force_hosts/nodes needs to be reset
        # if we want to make sure that the next destination
        # is not forced to be the original host
        self.request_spec.reset_forced_destinations()

        # TODO(gibi): We need to make sure that the requested_resources field
        # is re calculated based on neutron ports.

        self._restrict_request_spec_to_cell(legacy_props)

        # Once _preallocate_migration() is done, the source node allocation is
        # moved from the instance consumer to the migration record consumer,
        # and the instance consumer doesn't have any allocations. If this is
        # the first time through here (not a reschedule), select_destinations
        # below will allocate resources on the selected destination node for
        # the instance consumer. If we're rescheduling, host_list is not None
        # and we'll call claim_resources for the instance and the selected
        # alternate. If we exhaust our alternates and raise MaxRetriesExceeded,
        # the rollback() method should revert the allocation swaparoo and move
        # the source node allocation from the migration record back to the
        # instance record.
        migration = self._preallocate_migration()

        self.request_spec.ensure_project_and_user_id(self.instance)
        self.request_spec.ensure_network_metadata(self.instance)
        compute_utils.heal_reqspec_is_bfv(self.context, self.request_spec,
                                          self.instance)
        # On an initial call to migrate, 'self.host_list' will be None, so we
        # have to call the scheduler to get a list of acceptable hosts to
        # migrate to. That list will consist of a selected host, along with
        # zero or more alternates. On a reschedule, though, the alternates will
        # be passed to this object and stored in 'self.host_list', so we can
        # pop the first alternate from the list to use for the destination, and
        # pass the remaining alternates to the compute.
        if self.host_list is None:
            selection_lists = self.query_client.select_destinations(
                self.context,
                self.request_spec, [self.instance.uuid],
                return_objects=True,
                return_alternates=True)
            # Since there is only ever one instance to migrate per call, we
            # just need the first returned element.
            selection_list = selection_lists[0]
            # The selected host is the first item in the list, with the
            # alternates being the remainder of the list.
            selection, self.host_list = selection_list[0], selection_list[1:]
        else:
            # This is a reschedule that will use the supplied alternate hosts
            # in the host_list as destinations. Since the resources on these
            # alternates may have been consumed and might not be able to
            # support the migrated instance, we need to first claim the
            # resources to verify the host still has sufficient availabile
            # resources.
            elevated = self.context.elevated()
            host_available = False
            while self.host_list and not host_available:
                selection = self.host_list.pop(0)
                if selection.allocation_request:
                    alloc_req = jsonutils.loads(selection.allocation_request)
                else:
                    alloc_req = None
                if alloc_req:
                    # If this call succeeds, the resources on the destination
                    # host will be claimed by the instance.
                    host_available = scheduler_utils.claim_resources(
                        elevated, self.reportclient, self.request_spec,
                        self.instance.uuid, alloc_req,
                        selection.allocation_request_version)
                else:
                    # Some deployments use different schedulers that do not
                    # use Placement, so they will not have an
                    # allocation_request to claim with. For those cases,
                    # there is no concept of claiming, so just assume that
                    # the host is valid.
                    host_available = True
            # There are no more available hosts. Raise a MaxRetriesExceeded
            # exception in that case.
            if not host_available:
                reason = ("Exhausted all hosts available for retrying build "
                          "failures for instance %(instance_uuid)s." % {
                              "instance_uuid": self.instance.uuid
                          })
                raise exception.MaxRetriesExceeded(reason=reason)

        scheduler_utils.populate_filter_properties(legacy_props, selection)
        # context is not serializable
        legacy_props.pop('context', None)

        (host, node) = (selection.service_host, selection.nodename)

        self.instance.availability_zone = (
            availability_zones.get_host_availability_zone(self.context, host))

        LOG.debug(
            "Calling prep_resize with selected host: %s; "
            "Selected node: %s; Alternates: %s",
            host,
            node,
            self.host_list,
            instance=self.instance)
        # RPC cast to the destination host to start the migration process.
        self.compute_rpcapi.prep_resize(
            # NOTE(mriedem): Using request_spec.image here is potentially
            # dangerous if it is not kept up to date (i.e. rebuild/unshelve);
            # seems like the sane thing to do would be to pass the current
            # instance.image_meta since that is what MoveClaim will use for
            # any NUMA topology claims on the destination host...
            self.context,
            self.instance,
            self.request_spec.image,
            self.flavor,
            host,
            migration,
            request_spec=self.request_spec,
            filter_properties=legacy_props,
            node=node,
            clean_shutdown=self.clean_shutdown,
            host_list=self.host_list)