def _evacuate_with_failure(self, server, compute1): # Perform an evacuation during which we experience a failure on the # destination host instance_uuid = server['id'] with mock.patch.object(compute1.driver, 'plug_vifs') as plug_vifs: plug_vifs.side_effect = test.TestingException self.api.post_server_action(instance_uuid, {'evacuate': { 'host': 'compute1' }}) # Wait for the rebuild to start, then complete fake_notifier.wait_for_versioned_notifications( 'instance.rebuild.start') self._wait_for_migration_status(server, ['failed']) server = self._wait_for_server_parameter( server, {'OS-EXT-STS:task_state': None}) # Meta-test plug_vifs.assert_called() plug_vifs.reset_mock() # Return fresh server state after evacuate return server
def _rebuild_server(self, server, image_uuid, expected_state='ACTIVE'): """Rebuild a server.""" self.api.post_server_action( server['id'], {'rebuild': {'imageRef': image_uuid}}, ) fake_notifier.wait_for_versioned_notifications('instance.rebuild.end') return self._wait_for_state_change(server, expected_state)
def _reboot_server(self, server, hard=False, expected_state='ACTIVE'): """Reboot a server.""" self.api.post_server_action( server['id'], {'reboot': {'type': 'HARD' if hard else 'SOFT'}}, ) fake_notifier.wait_for_versioned_notifications('instance.reboot.end') return self._wait_for_state_change(server, expected_state)
def test_cache_image(self): """Test caching images by injecting the request directly to the conductor service and making sure it fans out and calls the expected nodes. """ aggregate = objects.Aggregate(name='test', uuid=uuids.aggregate, id=1, hosts=['compute1', 'compute3', 'compute4', 'compute5']) self.conductor.compute_task_mgr.cache_images( self.context, aggregate, ['an-image']) # NOTE(danms): We expect only three image cache attempts because # compute5 is marked as forced-down and compute2 is not in the # requested aggregate. for host in ['compute1', 'compute3', 'compute4']: mgr = getattr(self, host) self.assertEqual(set(['an-image']), mgr.driver.cached_images) for host in ['compute2', 'compute5']: mgr = getattr(self, host) self.assertEqual(set(), mgr.driver.cached_images) fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.start') fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.end')
def _attach_port(self, instance_uuid, port_id): self.api.attach_interface( instance_uuid, {'interfaceAttachment': { 'port_id': port_id }}) fake_notifier.wait_for_versioned_notifications( 'instance.interface_attach.end')
def _delete_and_check_allocations(self, server): """Delete the instance and asserts that the allocations are cleaned :param server: The API representation of the instance to be deleted """ self.api.delete_server(server['id']) self._wait_until_deleted(server) # NOTE(gibi): The resource allocation is deleted after the instance is # destroyed in the db so wait_until_deleted might return before the # the resource are deleted in placement. So we need to wait for the # instance.delete.end notification as that is emitted after the # resources are freed. fake_notifier.wait_for_versioned_notifications('instance.delete.end') for rp_uuid in [self._get_provider_uuid_by_host(hostname) for hostname in self.computes.keys()]: self.assertRequestMatchesUsage({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_uuid) # and no allocations for the deleted server allocations = self._get_allocations_by_server_uuid(server['id']) self.assertEqual(0, len(allocations))
def _delete_and_check_allocations(self, server): """Delete the instance and asserts that the allocations are cleaned :param server: The API representation of the instance to be deleted """ self.api.delete_server(server['id']) self._wait_until_deleted(server) # NOTE(gibi): The resource allocation is deleted after the instance is # destroyed in the db so wait_until_deleted might return before the # the resource are deleted in placement. So we need to wait for the # instance.delete.end notification as that is emitted after the # resources are freed. fake_notifier.wait_for_versioned_notifications('instance.delete.end') for rp_uuid in [ self._get_provider_uuid_by_host(hostname) for hostname in self.computes.keys() ]: self.assertRequestMatchesUsage( { 'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0 }, rp_uuid) # and no allocations for the deleted server allocations = self._get_allocations_by_server_uuid(server['id']) self.assertEqual(0, len(allocations))
def _resize_server(self, server, flavor_id): self.api.post_server_action(server['id'], {'resize': { 'flavorRef': flavor_id }}) fake_notifier.wait_for_versioned_notifications('instance.resize.end') return self._wait_for_state_change(server, 'VERIFY_RESIZE')
def _attach_volume_to_server(self, server_id, volume_id): """Attaches the volume to the server and waits for the "instance.volume_attach.end" versioned notification. """ body = {'volumeAttachment': {'volumeId': volume_id}} self.api.api_post( '/servers/%s/os-volume_attachments' % server_id, body) fake_notifier.wait_for_versioned_notifications( 'instance.volume_attach.end')
def test_restart_compute_while_instance_waiting_for_resource_claim(self): """Test for bug 1833581 where an instance is stuck in BUILD state forever due to compute service is restarted before the resource claim finished. """ # To reproduce the problem we need to stop / kill the compute service # when an instance build request has already reached the service but # the instance_claim() has not finished. One way that this # happens in practice is when multiple builds are waiting for the # 'nova-compute-resource' semaphore. So one way to reproduce this in # the test would be to grab that semaphore, boot an instance, wait for # it to reach the compute then stop the compute. # Unfortunately when we release the semaphore after the simulated # compute restart the original instance_claim execution continues as # the stopped compute is not 100% stopped in the func test env. Also # we cannot really keep the semaphore forever as this named semaphore # is shared between the old and new compute service. # There is another way to trigger the issue. We can inject a sleep into # instance_claim() to stop it. This is less realistic but it works in # the test env. server_req = self._build_minimal_create_server_request( self.api, 'interrupted-server', flavor_id=self.flavor1['id'], image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', networks='none') def sleep_forever(*args, **kwargs): time.sleep(1000000) with mock.patch('nova.compute.resource_tracker.ResourceTracker.' 'instance_claim') as mock_instance_claim: mock_instance_claim.side_effect = sleep_forever server = self.api.post_server({'server': server_req}) self._wait_for_state_change(self.admin_api, server, 'BUILD') # the instance.create.start is the closest thing to the # instance_claim call we can wait for in the test fake_notifier.wait_for_versioned_notifications( 'instance.create.start') self.restart_compute_service(self.compute1) # This is bug 1833581 as the server remains in BUILD state after the # compute restart. self._wait_for_state_change(self.admin_api, server, 'BUILD') # Not even the periodic task push this server to ERROR because the # server host is still None since the instance_claim didn't set it. self.flags(instance_build_timeout=1) self.compute1.manager._check_instance_build_time( nova_context.get_admin_context()) server = self.admin_api.get_server(server['id']) self.assertEqual('BUILD', server['status']) self.assertIsNone(server['OS-EXT-SRV-ATTR:host'])
def test_migrate_reschedule_blocked_az_up_call(self): # We need to stub out the call to get_host_availability_zone to blow # up once we have gone to the compute service. original_prep_resize = compute_manager.ComputeManager._prep_resize self.rescheduled = None def wrap_prep_resize(_self, *args, **kwargs): # Poison the AZ query to blow up as if the cell conductor does not # have access to the API DB. self.agg_mock = self.useFixture( fixtures.MockPatch( 'nova.objects.AggregateList.get_by_host', side_effect=oslo_db_exc.CantStartEngineError)).mock if self.rescheduled is None: # Track the first host that we rescheduled from. self.rescheduled = _self.host # Trigger a reschedule. raise exception.ComputeResourcesUnavailable( reason='test_migrate_reschedule_blocked_az_up_call') return original_prep_resize(_self, *args, **kwargs) self.stub_out('nova.compute.manager.ComputeManager._prep_resize', wrap_prep_resize) server = self._build_minimal_create_server_request( self.api, 'test_migrate_reschedule_blocked_az_up_call') server = self.api.post_server({'server': server}) server = self._wait_for_state_change(self.api, server, 'ACTIVE') original_host = server['OS-EXT-SRV-ATTR:host'] # Now cold migrate the server to the other host. self.api.post_server_action(server['id'], {'migrate': None}) # FIXME(mriedem): This is bug 1781286 where we reschedule from the # first selected host to conductor which will try to get the AZ for the # alternate host selection which will fail since it cannot access the # API DB. fake_notifier.wait_for_versioned_notifications( 'compute_task.migrate_server.error') server = self._wait_for_server_parameter( self.api, server, { 'status': 'ERROR', 'OS-EXT-SRV-ATTR:host': original_host, 'OS-EXT-STS:task_state': None }) # Assert there is a fault injected on the server. This is a bit # annoying in that we would expect to see CantStartEngineError but # because of how ComputeManager._reschedule_resize_or_reraise works. # the reschedule call to conductor is an RPC call so that exception # comes back to compute which injects a fault but then re-raises the # ComputeResourcesUnavailable exception which gets recorded as the most # recent fault which is what shows up in the API. So instead we assert # that the reschedule happened and assert the mocked method was called. self.assertIn('Insufficient compute resources', server['fault']['message']) self.assertIsNotNone(self.rescheduled) self.agg_mock.assert_called_once()
def _delete_and_check_allocations(self, server): """Delete the instance and asserts that the allocations are cleaned If the server was moved (resized or live migrated), also checks that migration-based allocations are also cleaned up. :param server: The API representation of the instance to be deleted :returns: The uuid of the migration record associated with the resize or cold migrate operation """ # First check to see if there is a related migration record so we can # assert its allocations (if any) are not leaked. with utils.temporary_mutation(self.admin_api, microversion='2.59'): migrations = self.admin_api.api_get( '/os-migrations?instance_uuid=%s' % server['id']).body['migrations'] if migrations: # If there is more than one migration, they are sorted by # created_at in descending order so we'll get the last one # which is probably what we'd always want anyway. migration_uuid = migrations[0]['uuid'] else: migration_uuid = None self._delete_server(server) # NOTE(gibi): The resource allocation is deleted after the instance is # destroyed in the db so wait_until_deleted might return before the # the resource are deleted in placement. So we need to wait for the # instance.delete.end notification as that is emitted after the # resources are freed. fake_notifier.wait_for_versioned_notifications('instance.delete.end') for rp_uuid in [ self._get_provider_uuid_by_host(hostname) for hostname in self.computes.keys() ]: self.assertRequestMatchesUsage( { 'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0 }, rp_uuid) # and no allocations for the deleted server allocations = self._get_allocations_by_server_uuid(server['id']) self.assertEqual(0, len(allocations)) if migration_uuid: # and no allocations for the delete migration allocations = self._get_allocations_by_server_uuid(migration_uuid) self.assertEqual(0, len(allocations)) return migration_uuid
def _attach_interface(self, server, port_uuid): """attach a neutron port to a server.""" body = { "interfaceAttachment": { "port_id": port_uuid } } attachment = self.api.attach_interface(server['id'], body) fake_notifier.wait_for_versioned_notifications( 'instance.interface_attach.end') return attachment
def _revert_resize(self, server): self.api.post_server_action(server['id'], {'revertResize': None}) server = self._wait_for_state_change(server, 'ACTIVE') self._wait_for_migration_status(server, ['reverted']) # Note that the migration status is changed to "reverted" in the # dest host revert_resize method but the allocations are cleaned up # in the source host finish_revert_resize method so we need to wait # for the finish_revert_resize method to complete. fake_notifier.wait_for_versioned_notifications( 'instance.resize_revert.end') return server
def test_cache_image(self): """Test caching images by injecting the request directly to the conductor service and making sure it fans out and calls the expected nodes. """ aggregate = objects.Aggregate( name='test', uuid=uuids.aggregate, id=1, hosts=['compute1', 'compute3', 'compute4', 'compute5']) self.conductor.compute_task_mgr.cache_images(self.context, aggregate, ['an-image']) # NOTE(danms): We expect only three image cache attempts because # compute5 is marked as forced-down and compute2 is not in the # requested aggregate. for host in ['compute1', 'compute3', 'compute4']: mgr = getattr(self, host) self.assertEqual(set(['an-image']), mgr.driver.cached_images) for host in ['compute2', 'compute5']: mgr = getattr(self, host) self.assertEqual(set(), mgr.driver.cached_images) fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.start') progress = fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.progress', n_events=4) self.assertEqual(4, len(progress), progress) for notification in progress: payload = notification['payload']['nova_object.data'] if payload['host'] == 'compute5': self.assertEqual(['an-image'], payload['images_failed']) self.assertEqual([], payload['images_cached']) else: self.assertEqual(['an-image'], payload['images_cached']) self.assertEqual([], payload['images_failed']) self.assertLessEqual(payload['index'], 4) self.assertGreater(payload['index'], 0) self.assertEqual(4, payload['total']) self.assertIn('conductor', notification['publisher_id']) fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.end') logtext = self.stdlog.logger.output self.assertIn( '3 cached, 0 existing, 0 errors, 0 unsupported, 1 skipped', logtext) self.assertNotIn('Image pre-cache operation for image an-image failed', logtext)
def test_restart_compute_while_instance_waiting_for_resource_claim(self): """Test for bug 1833581 where an instance is stuck in BUILD state forever due to compute service is restarted before the resource claim finished. """ # To reproduce the problem we need to stop / kill the compute service # when an instance build request has already reached the service but # the instance_claim() has not finished. One way that this # happens in practice is when multiple builds are waiting for the # 'nova-compute-resource' semaphore. So one way to reproduce this in # the test would be to grab that semaphore, boot an instance, wait for # it to reach the compute then stop the compute. # Unfortunately when we release the semaphore after the simulated # compute restart the original instance_claim execution continues as # the stopped compute is not 100% stopped in the func test env. Also # we cannot really keep the semaphore forever as this named semaphore # is shared between the old and new compute service. # There is another way to trigger the issue. We can inject a sleep into # instance_claim() to stop it. This is less realistic but it works in # the test env. server_req = self._build_minimal_create_server_request( 'interrupted-server', flavor_id=self.flavor1['id'], image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', networks='none') def sleep_forever(*args, **kwargs): time.sleep(1000000) with mock.patch('nova.compute.resource_tracker.ResourceTracker.' 'instance_claim') as mock_instance_claim: mock_instance_claim.side_effect = sleep_forever server = self.api.post_server({'server': server_req}) self._wait_for_state_change(server, 'BUILD') # the instance.create.start is the closest thing to the # instance_claim call we can wait for in the test fake_notifier.wait_for_versioned_notifications( 'instance.create.start') with mock.patch('nova.compute.manager.LOG.debug') as mock_log: self.restart_compute_service(self.compute1) # We expect that the instance is pushed to ERROR state during the # compute restart. self._wait_for_state_change(server, 'ERROR') mock_log.assert_called_with( 'Instance spawn was interrupted before instance_claim, setting ' 'instance to ERROR state', instance=mock.ANY)
def test_server_power_update(self): # This test checks the functionality of handling the "power-update" # external events. self.assertEqual(power_state.RUNNING, self.server['OS-EXT-STS:power_state']) self.api.create_server_external_events(events=[self.power_off]) expected_params = { 'OS-EXT-STS:task_state': None, 'OS-EXT-STS:vm_state': vm_states.STOPPED, 'OS-EXT-STS:power_state': power_state.SHUTDOWN } server = self._wait_for_server_parameter(self.server, expected_params) msg = ' with target power state POWER_OFF.' self.assertIn(msg, self.stdlog.logger.output) # Test if this is logged in the instance action list. actions = self.api.get_instance_actions(server['id']) self.assertEqual(2, len(actions)) acts = {action['action']: action for action in actions} self.assertEqual(['create', 'stop'], sorted(acts)) stop_action = acts[instance_actions.STOP] detail = self.api.api_get( '/servers/%s/os-instance-actions/%s' % (server['id'], stop_action['request_id'])).body['instanceAction'] events_by_name = {event['event']: event for event in detail['events']} self.assertEqual(1, len(detail['events']), detail) self.assertIn('compute_power_update', events_by_name) self.assertEqual('Success', detail['events'][0]['result']) # Test if notifications were emitted. fake_notifier.wait_for_versioned_notifications( 'instance.power_off.start') fake_notifier.wait_for_versioned_notifications( 'instance.power_off.end') # Checking POWER_ON self.api.create_server_external_events(events=[self.power_on]) expected_params = { 'OS-EXT-STS:task_state': None, 'OS-EXT-STS:vm_state': vm_states.ACTIVE, 'OS-EXT-STS:power_state': power_state.RUNNING } server = self._wait_for_server_parameter(self.server, expected_params) msg = ' with target power state POWER_ON.' self.assertIn(msg, self.stdlog.logger.output) # Test if this is logged in the instance action list. actions = self.api.get_instance_actions(server['id']) self.assertEqual(3, len(actions)) acts = {action['action']: action for action in actions} self.assertEqual(['create', 'start', 'stop'], sorted(acts)) start_action = acts[instance_actions.START] detail = self.api.api_get( '/servers/%s/os-instance-actions/%s' % (server['id'], start_action['request_id'])).body['instanceAction'] events_by_name = {event['event']: event for event in detail['events']} self.assertEqual(1, len(detail['events']), detail) self.assertIn('compute_power_update', events_by_name) self.assertEqual('Success', detail['events'][0]['result']) # Test if notifications were emitted. fake_notifier.wait_for_versioned_notifications( 'instance.power_on.start') fake_notifier.wait_for_versioned_notifications('instance.power_on.end')
def fake_rebuild(self_, context, instance, *args, **kwargs): # Simulate that the rebuild request of one of the instances # reaches the target compute manager significantly later so the # rebuild of the other instance can finish before the late # validation of the first rebuild. # We cannot simply delay the virt driver's rebuild or the # manager's _rebuild_default_impl as those run after the late # validation if instance.host == 'host1': # wait for the other instance rebuild to start fake_notifier.wait_for_versioned_notifications( 'instance.rebuild.start', n_events=1) original_rebuild(self_, context, instance, *args, **kwargs)
def test_migrate_reschedule_blocked_az_up_call(self): self.flags(default_availability_zone='us-central') # We need to stub out the call to get_host_availability_zone to blow # up once we have gone to the compute service. original_prep_resize = compute_manager.ComputeManager._prep_resize self.rescheduled = None def wrap_prep_resize(_self, *args, **kwargs): # Poison the AZ query to blow up as if the cell conductor does not # have access to the API DB. self.agg_mock = self.useFixture( fixtures.MockPatch( 'nova.objects.AggregateList.get_by_host', side_effect=oslo_db_exc.CantStartEngineError)).mock if self.rescheduled is None: # Track the first host that we rescheduled from. self.rescheduled = _self.host # Trigger a reschedule. raise exception.ComputeResourcesUnavailable( reason='test_migrate_reschedule_blocked_az_up_call') return original_prep_resize(_self, *args, **kwargs) self.stub_out('nova.compute.manager.ComputeManager._prep_resize', wrap_prep_resize) server = self._build_minimal_create_server_request( self.api, 'test_migrate_reschedule_blocked_az_up_call') server = self.api.post_server({'server': server}) server = self._wait_for_state_change(self.api, server, 'ACTIVE') original_host = server['OS-EXT-SRV-ATTR:host'] # Now cold migrate the server to the other host. self.api.post_server_action(server['id'], {'migrate': None}) # Because we poisoned AggregateList.get_by_host after hitting the # compute service we have to wait for the notification that the resize # is complete and then stop the mock so we can use the API again. fake_notifier.wait_for_versioned_notifications( 'instance.resize_finish.end') # Note that we use stopall here because we actually called _prep_resize # twice so we have more than one instance of the mock that needs to be # stopped. mock.patch.stopall() server = self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') final_host = server['OS-EXT-SRV-ATTR:host'] self.assertNotIn(final_host, [original_host, self.rescheduled]) # We should have rescheduled and the instance AZ should be set from the # Selection object. Since neither compute host is in an AZ, the server # is in the default AZ from config. self.assertEqual('us-central', server['OS-EXT-AZ:availability_zone']) self.agg_mock.assert_not_called()
def test_wait_for_versioned_notifications(self): # Wait for a single notification which we emitted first self._generate_exception_notification() notifications = fake_notifier.wait_for_versioned_notifications( 'compute.exception') self.assertEqual(1, len(notifications))
def test_aggregate_cache_images(self): aggregate_req = { "aggregate": { "name": "my-aggregate", "availability_zone": "nova"}} aggregate = self.admin_api.post_aggregate(aggregate_req) add_host_req = { "add_host": { "host": "compute" } } self.admin_api.post_aggregate_action(aggregate['id'], add_host_req) fake_notifier.reset() cache_images_req = { 'cache': [ {'id': '155d900f-4e14-4e4c-a73d-069cbf4541e6'} ] } self.admin_api.api_post('/os-aggregates/%s/images' % aggregate['id'], cache_images_req) # Since the operation is asynchronous we have to wait for the end # notification. fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.end') self.assertEqual(3, len(fake_notifier.VERSIONED_NOTIFICATIONS), fake_notifier.VERSIONED_NOTIFICATIONS) self._verify_notification( 'aggregate-cache_images-start', replacements={ 'uuid': aggregate['uuid'], 'id': aggregate['id']}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[0]) self._verify_notification( 'aggregate-cache_images-progress', replacements={ 'uuid': aggregate['uuid'], 'id': aggregate['id']}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[1]) self._verify_notification( 'aggregate-cache_images-end', replacements={ 'uuid': aggregate['uuid'], 'id': aggregate['id']}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[2])
def _delete_and_check_allocations(self, server): """Delete the instance and asserts that the allocations are cleaned If the server was moved (resized or live migrated), also checks that migration-based allocations are also cleaned up. :param server: The API representation of the instance to be deleted """ # First check to see if there is a related migration record so we can # assert its allocations (if any) are not leaked. with utils.temporary_mutation(self.admin_api, microversion='2.59'): migrations = self.admin_api.api_get( '/os-migrations?instance_uuid=%s' % server['id']).body['migrations'] if migrations: # If there is more than one migration, they are sorted by # created_at in descending order so we'll get the last one # which is probably what we'd always want anyway. migration_uuid = migrations[0]['uuid'] else: migration_uuid = None self.api.delete_server(server['id']) self._wait_until_deleted(server) # NOTE(gibi): The resource allocation is deleted after the instance is # destroyed in the db so wait_until_deleted might return before the # the resource are deleted in placement. So we need to wait for the # instance.delete.end notification as that is emitted after the # resources are freed. fake_notifier.wait_for_versioned_notifications('instance.delete.end') for rp_uuid in [self._get_provider_uuid_by_host(hostname) for hostname in self.computes.keys()]: self.assertRequestMatchesUsage({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_uuid) # and no allocations for the deleted server allocations = self._get_allocations_by_server_uuid(server['id']) self.assertEqual(0, len(allocations)) if migration_uuid: # and no allocations for the delete migration allocations = self._get_allocations_by_server_uuid(migration_uuid) self.assertEqual(0, len(allocations))
def test_wait_for_versioned_notifications_too_many(self): # Wait for a single notification when there are 2 in the queue self._generate_exception_notification() self._generate_exception_notification() notifications = fake_notifier.wait_for_versioned_notifications( 'compute.exception') self.assertEqual(2, len(notifications))
def _assert_allocation_revert_on_fail(self, server): # Since this happens in MigrationTask.rollback in conductor, we need # to wait for something which happens after that, which is the # ComputeTaskManager._cold_migrate method sending the # compute_task.migrate_server.error event. fake_notifier.wait_for_versioned_notifications( 'compute_task.migrate_server.error') mig_uuid = self.get_migration_uuid_for_instance(server['id']) mig_allocs = self._get_allocations_by_server_uuid(mig_uuid) self.assertEqual({}, mig_allocs) source_rp_uuid = self._get_provider_uuid_by_host( server['OS-EXT-SRV-ATTR:host']) server_allocs = self._get_allocations_by_server_uuid(server['id']) volume_backed = False if server['image'] else True self.assertFlavorMatchesAllocation( server['flavor'], server_allocs[source_rp_uuid]['resources'], volume_backed=volume_backed)
def test_unshelve_offloaded_fails_due_to_neutron(self): server = self._create_server(networks=[{ 'port': self.neutron.port_1['id'] }], az='nova:host1') # with default config shelve means immediate offload as well req = {'shelve': {}} self.api.post_server_action(server['id'], req) self._wait_for_server_parameter(server, { 'status': 'SHELVED_OFFLOADED', 'OS-EXT-SRV-ATTR:host': None }) allocations = self.placement_api.get('/allocations/%s' % server['id']).body['allocations'] self.assertEqual(0, len(allocations)) # disable the original host of the instance to force a port update # during unshelve source_service_id = self.api.get_services( host='host1', binary='nova-compute')[0]['id'] self.api.put_service(source_service_id, {"status": "disabled"}) # Simulate that port update fails during unshelve due to neutron is # unavailable with mock.patch('nova.tests.fixtures.NeutronFixture.' 'update_port') as mock_update_port: mock_update_port.side_effect = neutron_exception.ConnectionFailed( reason='test') req = {'unshelve': None} self.api.post_server_action(server['id'], req) fake_notifier.wait_for_versioned_notifications( 'instance.unshelve.start') self._wait_for_server_parameter( server, { 'status': 'SHELVED_OFFLOADED', 'OS-EXT-STS:task_state': None, 'OS-EXT-SRV-ATTR:host': None }) # As the instance went back to offloaded state we expect no allocation allocations = self.placement_api.get('/allocations/%s' % server['id']).body['allocations'] self.assertEqual(0, len(allocations))
def test_rebuild_with_keypair(self): keypair_req = { 'keypair': { 'name': 'test-key1', 'type': 'ssh', }, } keypair1 = self.api.post_keypair(keypair_req) keypair_req['keypair']['name'] = 'test-key2' keypair2 = self.api.post_keypair(keypair_req) server = self._build_server(networks='none') server.update({'key_name': 'test-key1'}) # Create a server with keypair 'test-key1' server = self.api.post_server({'server': server}) self._wait_for_state_change(server, 'ACTIVE') # Check keypairs ctxt = context.get_admin_context() instance = objects.Instance.get_by_uuid(ctxt, server['id'], expected_attrs=['keypairs']) self.assertEqual(keypair1['public_key'], instance.keypairs[0].public_key) # Rebuild a server with keypair 'test-key2' body = { 'rebuild': { 'imageRef': self.glance.auto_disk_config_enabled_image['id'], 'key_name': 'test-key2', }, } self.api.api_post('servers/%s/action' % server['id'], body) fake_notifier.wait_for_versioned_notifications('instance.rebuild.end') self._wait_for_state_change(server, 'ACTIVE') # Check keypairs changed instance = objects.Instance.get_by_uuid(ctxt, server['id'], expected_attrs=['keypairs']) self.assertEqual(keypair2['public_key'], instance.keypairs[0].public_key)
def _wait_for_notifications(self, event_type, expected_count, timeout=1.0): notifications = fake_notifier.wait_for_versioned_notifications( event_type, n_events=expected_count, timeout=timeout) self.assertEqual( expected_count, len(notifications), 'Unexpected number of %s notifications ' 'within the given timeout. ' 'Expected %d, got %d: %s' % (event_type, expected_count, len(notifications), notifications)) return notifications
def test_server_create_reschedule_blocked_az_up_call(self): # We need to stub out the call to get_host_availability_zone to blow # up once we have gone to the compute service. With the way our # RPC/DB fixtures are setup it's non-trivial to try and separate a # superconductor from a cell conductor so we can configure the cell # conductor from not having access to the API DB but that would be a # a nice thing to have at some point. original_bari = compute_manager.ComputeManager.build_and_run_instance def wrap_bari(*args, **kwargs): # Poison the AZ query to blow up as if the cell conductor does not # have access to the API DB. self.useFixture( fixtures.MockPatch( 'nova.objects.AggregateList.get_by_host', side_effect=oslo_db_exc.CantStartEngineError)) return original_bari(*args, **kwargs) self.stub_out( 'nova.compute.manager.ComputeManager.' 'build_and_run_instance', wrap_bari) server = self._build_minimal_create_server_request( self.api, 'test_server_create_reschedule_blocked_az_up_call') server = self.api.post_server({'server': server}) # FIXME(mriedem): This is bug 1781286 where we reschedule from the # first failed host to conductor which will try to get the AZ for the # alternate host selection which will fail since it cannot access the # API DB. # Note that we have to wait for the notification before calling the API # to avoid a race where instance.host is not None and the API tries to # hit the AggregateList.get_by_host method we mocked to fail. fake_notifier.wait_for_versioned_notifications( 'compute_task.build_instances.error') server = self._wait_for_server_parameter( self.api, server, { 'status': 'ERROR', 'OS-EXT-SRV-ATTR:host': None, 'OS-EXT-STS:task_state': None }) # Assert there is a fault injected on the server for the error we # expect. self.assertIn('CantStartEngineError', server['fault']['message'])
def _wait_for_notifications(self, event_type, expected_count, timeout=10.0): notifications = fake_notifier.wait_for_versioned_notifications( event_type, n_events=expected_count, timeout=timeout) msg = ''.join('\n%s' % notif for notif in notifications) self.assertEqual(expected_count, len(notifications), 'Unexpected number of %s notifications ' 'within the given timeout. ' 'Expected %d, got %d: %s' % (event_type, expected_count, len(notifications), msg)) return notifications
def _evacuate_with_failure(self, server, compute1): # Perform an evacuation during which we experience a failure on the # destination host with mock.patch.object(compute1.driver, 'plug_vifs') as plug_vifs: plug_vifs.side_effect = test.TestingException server = self._evacuate_server(server, {'host': 'compute1'}, expected_state='ERROR', expected_task_state=None, expected_migration_status='failed') # Wait for the rebuild to start, then complete fake_notifier.wait_for_versioned_notifications( 'instance.rebuild.start') # Meta-test plug_vifs.assert_called() plug_vifs.reset_mock() # Return fresh server state after evacuate return server
def test_server_create_reschedule_blocked_az_up_call(self): self.flags(default_availability_zone='us-central') # We need to stub out the call to get_host_availability_zone to blow # up once we have gone to the compute service. With the way our # RPC/DB fixtures are setup it's non-trivial to try and separate a # superconductor from a cell conductor so we can configure the cell # conductor from not having access to the API DB but that would be a # a nice thing to have at some point. original_bari = compute_manager.ComputeManager.build_and_run_instance def wrap_bari(*args, **kwargs): # Poison the AZ query to blow up as if the cell conductor does not # have access to the API DB. self.useFixture( fixtures.MockPatch( 'nova.objects.AggregateList.get_by_host', side_effect=oslo_db_exc.CantStartEngineError)) return original_bari(*args, **kwargs) self.stub_out( 'nova.compute.manager.ComputeManager.' 'build_and_run_instance', wrap_bari) server = self._build_minimal_create_server_request( 'test_server_create_reschedule_blocked_az_up_call') server = self.api.post_server({'server': server}) # Because we poisoned AggregateList.get_by_host after hitting the # compute service we have to wait for the notification that the build # is complete and then stop the mock so we can use the API again. fake_notifier.wait_for_versioned_notifications('instance.create.end') # Note that we use stopall here because we actually called # build_and_run_instance twice so we have more than one instance of # the mock that needs to be stopped. mock.patch.stopall() server = self._wait_for_state_change(server, 'ACTIVE') # We should have rescheduled and the instance AZ should be set from the # Selection object. Since neither compute host is in an AZ, the server # is in the default AZ from config. self.assertEqual('us-central', server['OS-EXT-AZ:availability_zone'])
def _wait_for_notification(self, event_type, timeout=10.0): # NOTE(mdbooth): wait_for_versioned_notifications raises an exception # if it times out since change I017d1a31. Consider removing this # method. fake_notifier.wait_for_versioned_notifications( event_type, timeout=timeout)
def _resume_server(self, server, expected_state='ACTIVE'): """Resume a server.""" self.api.post_server_action(server['id'], {'resume': {}}) fake_notifier.wait_for_versioned_notifications('instance.resume.end') return self._wait_for_state_change(server, expected_state)
def _suspend_server(self, server, expected_state='SUSPENDED'): """Suspend a server.""" self.api.post_server_action(server['id'], {'suspend': {}}) fake_notifier.wait_for_versioned_notifications('instance.suspend.end') return self._wait_for_state_change(server, expected_state)
def test_parallel_evacuate_with_server_group(self): self.skipTest('Skipped until bug 1763181 is fixed') group_req = {'name': 'a-name', 'policies': ['anti-affinity']} group = self.api.post_server_groups(group_req) # boot two instances with anti-affinity server = {'name': 'server', 'imageRef': self.image_id, 'flavorRef': self.flavor_id} hints = {'group': group['id']} created_server1 = self.api.post_server({'server': server, 'os:scheduler_hints': hints}) server1 = self._wait_for_state_change(self.api, created_server1, 'ACTIVE') created_server2 = self.api.post_server({'server': server, 'os:scheduler_hints': hints}) server2 = self._wait_for_state_change(self.api, created_server2, 'ACTIVE') # assert that the anti-affinity policy is enforced during the boot self.assertNotEqual(server1['OS-EXT-SRV-ATTR:host'], server2['OS-EXT-SRV-ATTR:host']) # simulate compute failure on both compute host to allow evacuation self.compute1.stop() # force it down to avoid waiting for the service group to time out self.api.force_down_service('host1', 'nova-compute', True) self.compute2.stop() self.api.force_down_service('host2', 'nova-compute', True) # start a third compute to have place for one of the instances fake.set_nodes(['host3']) self.compute3 = self.start_service('compute', host='host3') # evacuate both instances post = {'evacuate': {}} self.api.post_server_action(server1['id'], post) self.api.post_server_action(server2['id'], post) # make sure that the rebuild is started and then finished # NOTE(mdbooth): We only get 1 rebuild.start notification here because # we validate server group policy (and therefore fail) before emitting # rebuild.start. fake_notifier.wait_for_versioned_notifications( 'instance.rebuild.start', n_events=1) server1 = self._wait_for_server_parameter( self.api, server1, {'OS-EXT-STS:task_state': None}) server2 = self._wait_for_server_parameter( self.api, server2, {'OS-EXT-STS:task_state': None}) # NOTE(gibi): The instance.host set _after_ the instance state and # tast_state is set back to normal so it is not enough to wait for # that. The only thing that happens after the instance.host is set to # the target host is the migration status setting to done. So we have # to wait for that to avoid asserting the wrong host below. self._wait_for_migration_status(server1, ['done', 'failed']) self._wait_for_migration_status(server2, ['done', 'failed']) # get the servers again to have the latest information about their # hosts server1 = self.api.get_server(server1['id']) server2 = self.api.get_server(server2['id']) # assert that the anti-affinity policy is enforced during the # evacuation self.assertNotEqual(server1['OS-EXT-SRV-ATTR:host'], server2['OS-EXT-SRV-ATTR:host']) # assert that one of the evacuation was successful and that server is # moved to another host and the evacuation of the other server is # failed if server1['status'] == 'ERROR': failed_server = server1 evacuated_server = server2 else: failed_server = server2 evacuated_server = server1 self.assertEqual('ERROR', failed_server['status']) self.assertNotEqual('host3', failed_server['OS-EXT-SRV-ATTR:host']) self.assertEqual('ACTIVE', evacuated_server['status']) self.assertEqual('host3', evacuated_server['OS-EXT-SRV-ATTR:host'])
def _wait_for_notification(self, event_type, timeout=10.0): notifications = fake_notifier.wait_for_versioned_notifications( event_type, timeout=timeout) self.assertTrue( len(notifications) > 0, 'notification %s hasn\'t been received' % event_type)