def test_find_destination_works_with_no_request_spec(self): task = live_migrate.LiveMigrationTask( self.context, self.instance, self.destination, self.block_migration, self.disk_over_commit, self.migration, compute_rpcapi.ComputeAPI(), servicegroup.API(), scheduler_client.SchedulerClient(), request_spec=None) another_spec = objects.RequestSpec() self.instance.flavor = objects.Flavor() self.instance.numa_topology = None self.instance.pci_requests = None @mock.patch.object(task, '_call_livem_checks_on_host') @mock.patch.object(task, '_check_compatible_with_source_hypervisor') @mock.patch.object(task.scheduler_client, 'select_destinations') @mock.patch.object(objects.RequestSpec, 'from_components') @mock.patch.object(scheduler_utils, 'setup_instance_group') @mock.patch.object(utils, 'get_image_from_system_metadata') def do_test(get_image, setup_ig, from_components, select_dest, check_compat, call_livem_checks): get_image.return_value = "image" from_components.return_value = another_spec select_dest.return_value = [[fake_selection1]] self.assertEqual(("host1", "node1"), task._find_destination()) get_image.assert_called_once_with(self.instance.system_metadata) setup_ig.assert_called_once_with(self.context, another_spec) select_dest.assert_called_once_with(self.context, another_spec, [self.instance.uuid], return_objects=True, return_alternates=False) # Make sure the request_spec was updated to include the cell # mapping. self.assertIsNotNone(another_spec.requested_destination.cell) check_compat.assert_called_once_with("host1") call_livem_checks.assert_called_once_with("host1") do_test()
def revert_allocation_for_migration(context, source_cn, instance, migration): """Revert an allocation made for a migration back to the instance.""" schedclient = scheduler_client.SchedulerClient() reportclient = schedclient.reportclient # FIXME(danms): This method is flawed in that it asssumes allocations # against only one provider. So, this may overwite allocations against # a shared provider, if we had one. success = reportclient.move_allocations(context, migration.uuid, instance.uuid) if not success: LOG.error( 'Unable to replace resource claim on source ' 'host %(host)s node %(node)s for instance', { 'host': instance.host, 'node': instance.node }, instance=instance) else: LOG.debug('Created allocations for instance %(inst)s on %(rp)s', { 'inst': instance.uuid, 'rp': source_cn.uuid })
def __init__(self, *args, **kwargs): super(FilterScheduler, self).__init__(*args, **kwargs) self.notifier = rpc.get_notifier('scheduler') scheduler_client = client.SchedulerClient() self.placement_client = scheduler_client.reportclient
def __init__(self): super(ComputeTaskManager, self).__init__() self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.image_api = image.API() self.scheduler_client = scheduler_client.SchedulerClient()
def setUp(self): super(SchedulerClientTestCase, self).setUp() self.client = scheduler_client.SchedulerClient()
def _generate_task(self): self.task = live_migrate.LiveMigrationTask( self.context, self.instance, self.destination, self.block_migration, self.disk_over_commit, self.migration, compute_rpcapi.ComputeAPI(), servicegroup.API(), scheduler_client.SchedulerClient(), self.fake_spec)
def revert_allocation_for_migration(source_cn, instance, migration, orig_alloc): """Revert an allocation made for a migration back to the instance.""" schedclient = scheduler_client.SchedulerClient() reportclient = schedclient.reportclient # FIXME(danms): Since we don't have an atomic operation to adjust # allocations for multiple consumers, we have to have space on the # source for double the claim before we delete the old one # FIXME(danms): This method is flawed in that it asssumes allocations # against only one provider. So, this may overwite allocations against # a shared provider, if we had one. success = reportclient.put_allocations(source_cn.uuid, instance.uuid, orig_alloc, instance.project_id, instance.user_id) if not success: LOG.error( 'Unable to replace resource claim on source ' 'host %(host)s node %(node)s for instance', { 'host': instance.host, 'node': instance.node }, instance=instance) else: LOG.debug('Created allocations for instance %(inst)s on %(rp)s', { 'inst': instance.uuid, 'rp': source_cn.uuid }) reportclient.delete_allocation_for_instance(migration.uuid) # TODO(danms): Remove this late retry logic when we can replace # the above two-step process with a single atomic one. Until then, # we just re-attempt the claim for the instance now that we have # cleared what should be an equal amount of space by deleting the # holding migraton. if not success: # NOTE(danms): We failed to claim the resources for the # instance above before the delete of the migration's # claim. Try again to claim for the instance. This is just # a racy attempt to be atomic and avoid stranding this # instance without an allocation. When we have an atomic # replace operation we should remove this. success = reportclient.put_allocations(source_cn.uuid, instance.uuid, orig_alloc, instance.project_id, instance.user_id) if success: LOG.debug( 'Created allocations for instance %(inst)s on %(rp)s ' '(retried)', { 'inst': instance.uuid, 'rp': source_cn.uuid }) else: LOG.error( 'Unable to replace resource claim on source ' 'host %(host)s node %(node)s for instance (retried)', { 'host': instance.host, 'node': instance.node }, instance=instance)
def replace_allocation_with_migration(context, instance, migration): """Replace instance's allocation with one for a migration. :returns: (source_compute_node, migration_allocation) """ try: source_cn = objects.ComputeNode.get_by_host_and_nodename( context, instance.host, instance.node) except exception.ComputeHostNotFound: LOG.error( 'Unable to find record for source ' 'node %(node)s on %(host)s', { 'host': instance.host, 'node': instance.node }, instance=instance) # A generic error like this will just error out the migration # and do any rollback required raise schedclient = scheduler_client.SchedulerClient() reportclient = schedclient.reportclient orig_alloc = reportclient.get_allocations_for_consumer_by_provider( source_cn.uuid, instance.uuid) if not orig_alloc: LOG.error('Unable to find existing allocations for instance', instance=instance) # A generic error like this will just error out the migration # and do any rollback required raise exception.InstanceUnacceptable( instance_id=instance.uuid, reason=_('Instance has no source node allocation')) # FIXME(danms): Since we don't have an atomic operation to adjust # allocations for multiple consumers, we have to have space on the # source for double the claim before we delete the old one # FIXME(danms): This method is flawed in that it asssumes allocations # against only one provider. So, this may overwite allocations against # a shared provider, if we had one. success = reportclient.put_allocations(source_cn.uuid, migration.uuid, orig_alloc, instance.project_id, instance.user_id) if not success: LOG.error( 'Unable to replace resource claim on source ' 'host %(host)s node %(node)s for instance', { 'host': instance.host, 'node': instance.node }, instance=instance) # Mimic the "no space" error that could have come from the # scheduler. Once we have an atomic replace operation, this # would be a severe error. raise exception.NoValidHost( reason=_('Unable to replace instance claim on source')) else: LOG.debug('Created allocations for migration %(mig)s on %(rp)s', { 'mig': migration.uuid, 'rp': source_cn.uuid }) reportclient.delete_allocation_for_instance(instance.uuid) return source_cn, orig_alloc
def _generate_task(self): return migrate.MigrationTask(self.context, self.instance, self.flavor, self.filter_properties, self.request_spec, self.reservations, self.clean_shutdown, compute_rpcapi.ComputeAPI(), scheduler_client.SchedulerClient())
def allocation_sync(cxt): hm = host_manager.HostManager() states = hm.get_all_host_states(cxt) compute_api = compute.API() node_vm_map = {} reportclient = scheduler_client.SchedulerClient().reportclient now = datetime.now() for state in states: rp_uuid = state.uuid rp = rp_obj.ResourceProvider.get_by_uuid(cxt, rp_uuid) # NOTE(fanzhang): Constructing a mapping of instance lists on node # and node name node_vm_map.setdefault(rp.name, set()) for instance_uuid in state.instances: instance_obj = state.instances[instance_uuid] node_name = instance_obj.node node_vm_map.setdefault(node_name, set()) node_vm_map[node_name].add(instance_uuid) LOG.debug("Instance uuid is %s", instance_uuid) vms_in_node = node_vm_map[rp.name] allocations_list = rp_obj.AllocationList.\ get_all_by_resource_provider(cxt, rp) LOG.debug('AllocationList is %s', allocations_list) vms_in_allocation = set(map(lambda x: x.consumer_id, allocations_list)) if vms_in_node != vms_in_allocation: LOG.warn('Instances on node %s do not match allocations %s', vms_in_node, vms_in_allocation) # NOTE(fanzhang): Delete allocations of vms which not on compute nodes allocations_more = vms_in_allocation - vms_in_node if allocations_more: LOG.warn( 'Instances in allocations are more than those on node: %s', allocations_more) for allocation in allocations_list: if allocation.consumer_id in allocations_more: allocs = rp_obj.AllocationList.get_all_by_consumer_id( cxt, consumer_id=allocation.consumer_id) created_at = allocation.created_at.replace(tzinfo=None) delta = (now - created_at).seconds if delta >= 1800: LOG.info('Try to delete %s', allocation) LOG.debug('Allocations by consumer id are %s', allocs) # log_redo_sql(allocs, allocation.id) allocation.destroy() else: LOG.info('allocation %s created in 30 minute', allocation) # NOTE(fanzhang): Create allocations for vms on compute nodes without # allocation records. host_manager_more = vms_in_node - vms_in_allocation if host_manager_more: LOG.warn('Instances on nodes are more than allocations: %s', host_manager_more) for instance_uuid in host_manager_more: instance = compute_api.get(cxt, instance_uuid) LOG.debug(instance) LOG.warn( 'Should create allocation record with ' 'resource provider uuid is %s and consumer id is: %s', rp_uuid, instance.uuid)
def __init__(self): super(ComputeTaskManager, self).__init__() self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.image_api = image.API() self.scheduler_client = scheduler_client.SchedulerClient() self.notifier = rpc.get_notifier('compute', CONF.host)
def replace_allocation_with_migration(context, instance, migration): """Replace instance's allocation with one for a migration. :raises: keystoneauth1.exceptions.base.ClientException on failure to communicate with the placement API :raises: ConsumerAllocationRetrievalFailed if reading the current allocation from placement fails :raises: ComputeHostNotFound if the host of the instance is not found in the databse :raises: AllocationMoveFailed if moving the allocation from the instance.uuid to the migration.uuid fails due to parallel placement operation on the instance consumer :raises: NoValidHost if placement rejectes the update for other reasons (e.g. not enough resources) :returns: (source_compute_node, migration_allocation) """ try: source_cn = objects.ComputeNode.get_by_host_and_nodename( context, instance.host, instance.node) except exception.ComputeHostNotFound: LOG.error( 'Unable to find record for source ' 'node %(node)s on %(host)s', { 'host': instance.host, 'node': instance.node }, instance=instance) # A generic error like this will just error out the migration # and do any rollback required raise schedclient = scheduler_client.SchedulerClient() reportclient = schedclient.reportclient orig_alloc = reportclient.get_allocs_for_consumer( context, instance.uuid)['allocations'] root_alloc = orig_alloc.get(source_cn.uuid, {}).get('resources', {}) if not root_alloc: LOG.debug( 'Unable to find existing allocations for instance on ' 'source compute node: %s. This is normal if you are not ' 'using the FilterScheduler.', source_cn.uuid, instance=instance) return None, None # FIXME(danms): This method is flawed in that it asssumes allocations # against only one provider. So, this may overwite allocations against # a shared provider, if we had one. success = reportclient.move_allocations(context, instance.uuid, migration.uuid) if not success: LOG.error( 'Unable to replace resource claim on source ' 'host %(host)s node %(node)s for instance', { 'host': instance.host, 'node': instance.node }, instance=instance) # Mimic the "no space" error that could have come from the # scheduler. Once we have an atomic replace operation, this # would be a severe error. raise exception.NoValidHost( reason=_('Unable to replace instance claim on source')) else: LOG.debug('Created allocations for migration %(mig)s on %(rp)s', { 'mig': migration.uuid, 'rp': source_cn.uuid }) return source_cn, orig_alloc
from oslo_log import log as logging from oslo_utils import importutils import nova.conf from nova import config from nova import objects from nova import context from nova.scheduler import utils from nova.scheduler import client as scheduler_client CONF = nova.conf.CONF logging.setup(CONF, 'nova') LOG = logging.getLogger(__name__) argv = [] default_config_files = ['/etc/nova/nova.conf'] config.parse_args(argv, default_config_files=default_config_files) objects.register_all() context = context.get_admin_context() client = scheduler_client.SchedulerClient() placement_client = client.reportclient instance_uuid = 'fdc43c5c-49e1-448b-8cb1-c0d73030697f' request_spec = objects.RequestSpec.get_by_instance_uuid(context, instance_uuid) resources = utils.resources_from_request_spec(request_spec) res = placement_client.get_allocation_candidates(resources) alloc_reqs, provider_summaries = res