Exemple #1
0
    def test_find_destination_works_with_no_request_spec(self):
        task = live_migrate.LiveMigrationTask(
            self.context, self.instance, self.destination,
            self.block_migration, self.disk_over_commit, self.migration,
            compute_rpcapi.ComputeAPI(), servicegroup.API(),
            scheduler_client.SchedulerClient(), request_spec=None)
        another_spec = objects.RequestSpec()
        self.instance.flavor = objects.Flavor()
        self.instance.numa_topology = None
        self.instance.pci_requests = None

        @mock.patch.object(task, '_call_livem_checks_on_host')
        @mock.patch.object(task, '_check_compatible_with_source_hypervisor')
        @mock.patch.object(task.scheduler_client, 'select_destinations')
        @mock.patch.object(objects.RequestSpec, 'from_components')
        @mock.patch.object(scheduler_utils, 'setup_instance_group')
        @mock.patch.object(utils, 'get_image_from_system_metadata')
        def do_test(get_image, setup_ig, from_components, select_dest,
                    check_compat, call_livem_checks):
            get_image.return_value = "image"
            from_components.return_value = another_spec
            select_dest.return_value = [[fake_selection1]]

            self.assertEqual(("host1", "node1"), task._find_destination())

            get_image.assert_called_once_with(self.instance.system_metadata)
            setup_ig.assert_called_once_with(self.context, another_spec)
            select_dest.assert_called_once_with(self.context, another_spec,
                    [self.instance.uuid], return_objects=True,
                    return_alternates=False)
            # Make sure the request_spec was updated to include the cell
            # mapping.
            self.assertIsNotNone(another_spec.requested_destination.cell)
            check_compat.assert_called_once_with("host1")
            call_livem_checks.assert_called_once_with("host1")
        do_test()
Exemple #2
0
def revert_allocation_for_migration(context, source_cn, instance, migration):
    """Revert an allocation made for a migration back to the instance."""

    schedclient = scheduler_client.SchedulerClient()
    reportclient = schedclient.reportclient

    # FIXME(danms): This method is flawed in that it asssumes allocations
    # against only one provider. So, this may overwite allocations against
    # a shared provider, if we had one.
    success = reportclient.move_allocations(context, migration.uuid,
                                            instance.uuid)
    if not success:
        LOG.error(
            'Unable to replace resource claim on source '
            'host %(host)s node %(node)s for instance', {
                'host': instance.host,
                'node': instance.node
            },
            instance=instance)
    else:
        LOG.debug('Created allocations for instance %(inst)s on %(rp)s', {
            'inst': instance.uuid,
            'rp': source_cn.uuid
        })
 def __init__(self, *args, **kwargs):
     super(FilterScheduler, self).__init__(*args, **kwargs)
     self.notifier = rpc.get_notifier('scheduler')
     scheduler_client = client.SchedulerClient()
     self.placement_client = scheduler_client.reportclient
Exemple #4
0
 def __init__(self):
     super(ComputeTaskManager, self).__init__()
     self.compute_rpcapi = compute_rpcapi.ComputeAPI()
     self.image_api = image.API()
     self.scheduler_client = scheduler_client.SchedulerClient()
Exemple #5
0
 def setUp(self):
     super(SchedulerClientTestCase, self).setUp()
     self.client = scheduler_client.SchedulerClient()
Exemple #6
0
 def _generate_task(self):
     self.task = live_migrate.LiveMigrationTask(
         self.context, self.instance, self.destination,
         self.block_migration, self.disk_over_commit, self.migration,
         compute_rpcapi.ComputeAPI(), servicegroup.API(),
         scheduler_client.SchedulerClient(), self.fake_spec)
def revert_allocation_for_migration(source_cn, instance, migration,
                                    orig_alloc):
    """Revert an allocation made for a migration back to the instance."""

    schedclient = scheduler_client.SchedulerClient()
    reportclient = schedclient.reportclient

    # FIXME(danms): Since we don't have an atomic operation to adjust
    # allocations for multiple consumers, we have to have space on the
    # source for double the claim before we delete the old one
    # FIXME(danms): This method is flawed in that it asssumes allocations
    # against only one provider. So, this may overwite allocations against
    # a shared provider, if we had one.
    success = reportclient.put_allocations(source_cn.uuid, instance.uuid,
                                           orig_alloc, instance.project_id,
                                           instance.user_id)
    if not success:
        LOG.error(
            'Unable to replace resource claim on source '
            'host %(host)s node %(node)s for instance', {
                'host': instance.host,
                'node': instance.node
            },
            instance=instance)
    else:
        LOG.debug('Created allocations for instance %(inst)s on %(rp)s', {
            'inst': instance.uuid,
            'rp': source_cn.uuid
        })

    reportclient.delete_allocation_for_instance(migration.uuid)

    # TODO(danms): Remove this late retry logic when we can replace
    # the above two-step process with a single atomic one. Until then,
    # we just re-attempt the claim for the instance now that we have
    # cleared what should be an equal amount of space by deleting the
    # holding migraton.

    if not success:
        # NOTE(danms): We failed to claim the resources for the
        # instance above before the delete of the migration's
        # claim. Try again to claim for the instance. This is just
        # a racy attempt to be atomic and avoid stranding this
        # instance without an allocation. When we have an atomic
        # replace operation we should remove this.
        success = reportclient.put_allocations(source_cn.uuid, instance.uuid,
                                               orig_alloc, instance.project_id,
                                               instance.user_id)
        if success:
            LOG.debug(
                'Created allocations for instance %(inst)s on %(rp)s '
                '(retried)', {
                    'inst': instance.uuid,
                    'rp': source_cn.uuid
                })
        else:
            LOG.error(
                'Unable to replace resource claim on source '
                'host %(host)s node %(node)s for instance (retried)', {
                    'host': instance.host,
                    'node': instance.node
                },
                instance=instance)
def replace_allocation_with_migration(context, instance, migration):
    """Replace instance's allocation with one for a migration.

    :returns: (source_compute_node, migration_allocation)
    """
    try:
        source_cn = objects.ComputeNode.get_by_host_and_nodename(
            context, instance.host, instance.node)
    except exception.ComputeHostNotFound:
        LOG.error(
            'Unable to find record for source '
            'node %(node)s on %(host)s', {
                'host': instance.host,
                'node': instance.node
            },
            instance=instance)
        # A generic error like this will just error out the migration
        # and do any rollback required
        raise

    schedclient = scheduler_client.SchedulerClient()
    reportclient = schedclient.reportclient

    orig_alloc = reportclient.get_allocations_for_consumer_by_provider(
        source_cn.uuid, instance.uuid)
    if not orig_alloc:
        LOG.error('Unable to find existing allocations for instance',
                  instance=instance)
        # A generic error like this will just error out the migration
        # and do any rollback required
        raise exception.InstanceUnacceptable(
            instance_id=instance.uuid,
            reason=_('Instance has no source node allocation'))

    # FIXME(danms): Since we don't have an atomic operation to adjust
    # allocations for multiple consumers, we have to have space on the
    # source for double the claim before we delete the old one
    # FIXME(danms): This method is flawed in that it asssumes allocations
    # against only one provider. So, this may overwite allocations against
    # a shared provider, if we had one.
    success = reportclient.put_allocations(source_cn.uuid, migration.uuid,
                                           orig_alloc, instance.project_id,
                                           instance.user_id)
    if not success:
        LOG.error(
            'Unable to replace resource claim on source '
            'host %(host)s node %(node)s for instance', {
                'host': instance.host,
                'node': instance.node
            },
            instance=instance)
        # Mimic the "no space" error that could have come from the
        # scheduler. Once we have an atomic replace operation, this
        # would be a severe error.
        raise exception.NoValidHost(
            reason=_('Unable to replace instance claim on source'))
    else:
        LOG.debug('Created allocations for migration %(mig)s on %(rp)s', {
            'mig': migration.uuid,
            'rp': source_cn.uuid
        })

    reportclient.delete_allocation_for_instance(instance.uuid)

    return source_cn, orig_alloc
 def _generate_task(self):
     return migrate.MigrationTask(self.context, self.instance, self.flavor,
                                  self.filter_properties, self.request_spec,
                                  self.reservations, self.clean_shutdown,
                                  compute_rpcapi.ComputeAPI(),
                                  scheduler_client.SchedulerClient())
def allocation_sync(cxt):
    hm = host_manager.HostManager()
    states = hm.get_all_host_states(cxt)
    compute_api = compute.API()
    node_vm_map = {}
    reportclient = scheduler_client.SchedulerClient().reportclient
    now = datetime.now()
    for state in states:
        rp_uuid = state.uuid
        rp = rp_obj.ResourceProvider.get_by_uuid(cxt, rp_uuid)

        # NOTE(fanzhang): Constructing a mapping of instance lists on node
        # and node name
        node_vm_map.setdefault(rp.name, set())
        for instance_uuid in state.instances:
            instance_obj = state.instances[instance_uuid]
            node_name = instance_obj.node
            node_vm_map.setdefault(node_name, set())
            node_vm_map[node_name].add(instance_uuid)
            LOG.debug("Instance uuid is %s", instance_uuid)
        vms_in_node = node_vm_map[rp.name]

        allocations_list = rp_obj.AllocationList.\
            get_all_by_resource_provider(cxt, rp)
        LOG.debug('AllocationList is %s', allocations_list)
        vms_in_allocation = set(map(lambda x: x.consumer_id, allocations_list))

        if vms_in_node != vms_in_allocation:
            LOG.warn('Instances on node %s do not match allocations %s',
                     vms_in_node, vms_in_allocation)

        # NOTE(fanzhang): Delete allocations of vms which not on compute nodes
        allocations_more = vms_in_allocation - vms_in_node
        if allocations_more:
            LOG.warn(
                'Instances in allocations are more than those on node: %s',
                allocations_more)
            for allocation in allocations_list:
                if allocation.consumer_id in allocations_more:
                    allocs = rp_obj.AllocationList.get_all_by_consumer_id(
                        cxt, consumer_id=allocation.consumer_id)
                    created_at = allocation.created_at.replace(tzinfo=None)
                    delta = (now - created_at).seconds
                    if delta >= 1800:
                        LOG.info('Try to delete %s', allocation)
                        LOG.debug('Allocations by consumer id are %s', allocs)
                        # log_redo_sql(allocs, allocation.id)
                        allocation.destroy()
                    else:
                        LOG.info('allocation %s created in 30 minute',
                                 allocation)

        # NOTE(fanzhang): Create allocations for vms on compute nodes without
        # allocation records.
        host_manager_more = vms_in_node - vms_in_allocation
        if host_manager_more:
            LOG.warn('Instances on nodes are more than allocations: %s',
                     host_manager_more)
            for instance_uuid in host_manager_more:
                instance = compute_api.get(cxt, instance_uuid)
                LOG.debug(instance)
                LOG.warn(
                    'Should create allocation record with '
                    'resource provider uuid is %s and consumer id is: %s',
                    rp_uuid, instance.uuid)
Exemple #11
0
 def __init__(self):
     super(ComputeTaskManager, self).__init__()
     self.compute_rpcapi = compute_rpcapi.ComputeAPI()
     self.image_api = image.API()
     self.scheduler_client = scheduler_client.SchedulerClient()
     self.notifier = rpc.get_notifier('compute', CONF.host)
Exemple #12
0
def replace_allocation_with_migration(context, instance, migration):
    """Replace instance's allocation with one for a migration.

    :raises: keystoneauth1.exceptions.base.ClientException on failure to
             communicate with the placement API
    :raises: ConsumerAllocationRetrievalFailed if reading the current
             allocation from placement fails
    :raises: ComputeHostNotFound if the host of the instance is not found in
             the databse
    :raises: AllocationMoveFailed if moving the allocation from the
             instance.uuid to the migration.uuid fails due to parallel
             placement operation on the instance consumer
    :raises: NoValidHost if placement rejectes the update for other reasons
             (e.g. not enough resources)
    :returns: (source_compute_node, migration_allocation)
    """
    try:
        source_cn = objects.ComputeNode.get_by_host_and_nodename(
            context, instance.host, instance.node)
    except exception.ComputeHostNotFound:
        LOG.error(
            'Unable to find record for source '
            'node %(node)s on %(host)s', {
                'host': instance.host,
                'node': instance.node
            },
            instance=instance)
        # A generic error like this will just error out the migration
        # and do any rollback required
        raise

    schedclient = scheduler_client.SchedulerClient()
    reportclient = schedclient.reportclient

    orig_alloc = reportclient.get_allocs_for_consumer(
        context, instance.uuid)['allocations']
    root_alloc = orig_alloc.get(source_cn.uuid, {}).get('resources', {})
    if not root_alloc:
        LOG.debug(
            'Unable to find existing allocations for instance on '
            'source compute node: %s. This is normal if you are not '
            'using the FilterScheduler.',
            source_cn.uuid,
            instance=instance)
        return None, None

    # FIXME(danms): This method is flawed in that it asssumes allocations
    # against only one provider. So, this may overwite allocations against
    # a shared provider, if we had one.
    success = reportclient.move_allocations(context, instance.uuid,
                                            migration.uuid)
    if not success:
        LOG.error(
            'Unable to replace resource claim on source '
            'host %(host)s node %(node)s for instance', {
                'host': instance.host,
                'node': instance.node
            },
            instance=instance)
        # Mimic the "no space" error that could have come from the
        # scheduler. Once we have an atomic replace operation, this
        # would be a severe error.
        raise exception.NoValidHost(
            reason=_('Unable to replace instance claim on source'))
    else:
        LOG.debug('Created allocations for migration %(mig)s on %(rp)s', {
            'mig': migration.uuid,
            'rp': source_cn.uuid
        })

    return source_cn, orig_alloc
Exemple #13
0
from oslo_log import log as logging
from oslo_utils import importutils
import nova.conf
from nova import config
from nova import objects
from nova import context
from nova.scheduler import utils
from nova.scheduler import client as scheduler_client

CONF = nova.conf.CONF

logging.setup(CONF, 'nova')
LOG = logging.getLogger(__name__)

argv = []
default_config_files = ['/etc/nova/nova.conf']
config.parse_args(argv, default_config_files=default_config_files)
objects.register_all()
context = context.get_admin_context()

client = scheduler_client.SchedulerClient()
placement_client = client.reportclient

instance_uuid = 'fdc43c5c-49e1-448b-8cb1-c0d73030697f'
request_spec = objects.RequestSpec.get_by_instance_uuid(context, instance_uuid)
resources = utils.resources_from_request_spec(request_spec)
res = placement_client.get_allocation_candidates(resources)
alloc_reqs, provider_summaries = res