예제 #1
0
    def _evacuate_with_failure(self, server, compute1):
        # Perform an evacuation during which we experience a failure on the
        # destination host
        instance_uuid = server['id']

        with mock.patch.object(compute1.driver, 'plug_vifs') as plug_vifs:
            plug_vifs.side_effect = test.TestingException

            self.api.post_server_action(instance_uuid,
                                        {'evacuate': {
                                            'host': 'compute1'
                                        }})

            # Wait for the rebuild to start, then complete
            fake_notifier.wait_for_versioned_notifications(
                'instance.rebuild.start')
            self._wait_for_migration_status(server, ['failed'])
            server = self._wait_for_server_parameter(
                server, {'OS-EXT-STS:task_state': None})

            # Meta-test
            plug_vifs.assert_called()
            plug_vifs.reset_mock()

        # Return fresh server state after evacuate
        return server
예제 #2
0
 def _rebuild_server(self, server, image_uuid, expected_state='ACTIVE'):
     """Rebuild a server."""
     self.api.post_server_action(
         server['id'], {'rebuild': {'imageRef': image_uuid}},
     )
     fake_notifier.wait_for_versioned_notifications('instance.rebuild.end')
     return self._wait_for_state_change(server, expected_state)
예제 #3
0
 def _reboot_server(self, server, hard=False, expected_state='ACTIVE'):
     """Reboot a server."""
     self.api.post_server_action(
         server['id'], {'reboot': {'type': 'HARD' if hard else 'SOFT'}},
     )
     fake_notifier.wait_for_versioned_notifications('instance.reboot.end')
     return self._wait_for_state_change(server, expected_state)
예제 #4
0
    def test_cache_image(self):
        """Test caching images by injecting the request directly to
        the conductor service and making sure it fans out and calls
        the expected nodes.
        """

        aggregate = objects.Aggregate(name='test',
                                      uuid=uuids.aggregate,
                                      id=1,
                                      hosts=['compute1', 'compute3',
                                             'compute4', 'compute5'])
        self.conductor.compute_task_mgr.cache_images(
            self.context, aggregate, ['an-image'])

        # NOTE(danms): We expect only three image cache attempts because
        # compute5 is marked as forced-down and compute2 is not in the
        # requested aggregate.
        for host in ['compute1', 'compute3', 'compute4']:
            mgr = getattr(self, host)
            self.assertEqual(set(['an-image']), mgr.driver.cached_images)
        for host in ['compute2', 'compute5']:
            mgr = getattr(self, host)
            self.assertEqual(set(), mgr.driver.cached_images)

        fake_notifier.wait_for_versioned_notifications(
            'aggregate.cache_images.start')
        fake_notifier.wait_for_versioned_notifications(
            'aggregate.cache_images.end')
예제 #5
0
 def _attach_port(self, instance_uuid, port_id):
     self.api.attach_interface(
         instance_uuid, {'interfaceAttachment': {
             'port_id': port_id
         }})
     fake_notifier.wait_for_versioned_notifications(
         'instance.interface_attach.end')
예제 #6
0
    def _delete_and_check_allocations(self, server):
        """Delete the instance and asserts that the allocations are cleaned

        :param server: The API representation of the instance to be deleted
        """

        self.api.delete_server(server['id'])
        self._wait_until_deleted(server)
        # NOTE(gibi): The resource allocation is deleted after the instance is
        # destroyed in the db so wait_until_deleted might return before the
        # the resource are deleted in placement. So we need to wait for the
        # instance.delete.end notification as that is emitted after the
        # resources are freed.

        fake_notifier.wait_for_versioned_notifications('instance.delete.end')

        for rp_uuid in [self._get_provider_uuid_by_host(hostname)
                        for hostname in self.computes.keys()]:
            self.assertRequestMatchesUsage({'VCPU': 0,
                                            'MEMORY_MB': 0,
                                            'DISK_GB': 0}, rp_uuid)

        # and no allocations for the deleted server
        allocations = self._get_allocations_by_server_uuid(server['id'])
        self.assertEqual(0, len(allocations))
예제 #7
0
    def _delete_and_check_allocations(self, server):
        """Delete the instance and asserts that the allocations are cleaned

        :param server: The API representation of the instance to be deleted
        """

        self.api.delete_server(server['id'])
        self._wait_until_deleted(server)
        # NOTE(gibi): The resource allocation is deleted after the instance is
        # destroyed in the db so wait_until_deleted might return before the
        # the resource are deleted in placement. So we need to wait for the
        # instance.delete.end notification as that is emitted after the
        # resources are freed.

        fake_notifier.wait_for_versioned_notifications('instance.delete.end')

        for rp_uuid in [
                self._get_provider_uuid_by_host(hostname)
                for hostname in self.computes.keys()
        ]:
            self.assertRequestMatchesUsage(
                {
                    'VCPU': 0,
                    'MEMORY_MB': 0,
                    'DISK_GB': 0
                }, rp_uuid)

        # and no allocations for the deleted server
        allocations = self._get_allocations_by_server_uuid(server['id'])
        self.assertEqual(0, len(allocations))
예제 #8
0
 def _resize_server(self, server, flavor_id):
     self.api.post_server_action(server['id'],
                                 {'resize': {
                                     'flavorRef': flavor_id
                                 }})
     fake_notifier.wait_for_versioned_notifications('instance.resize.end')
     return self._wait_for_state_change(server, 'VERIFY_RESIZE')
예제 #9
0
 def _attach_volume_to_server(self, server_id, volume_id):
     """Attaches the volume to the server and waits for the
     "instance.volume_attach.end" versioned notification.
     """
     body = {'volumeAttachment': {'volumeId': volume_id}}
     self.api.api_post(
         '/servers/%s/os-volume_attachments' % server_id, body)
     fake_notifier.wait_for_versioned_notifications(
         'instance.volume_attach.end')
예제 #10
0
    def test_restart_compute_while_instance_waiting_for_resource_claim(self):
        """Test for bug 1833581 where an instance is stuck in
        BUILD state forever due to compute service is restarted before the
        resource claim finished.
        """

        # To reproduce the problem we need to stop / kill the compute service
        # when an instance build request has already reached the service but
        # the instance_claim() has not finished. One way that this
        # happens in practice is when multiple builds are waiting for the
        # 'nova-compute-resource' semaphore. So one way to reproduce this in
        # the test would be to grab that semaphore, boot an instance, wait for
        # it to reach the compute then stop the compute.
        # Unfortunately when we release the semaphore after the simulated
        # compute restart the original instance_claim execution continues as
        # the stopped compute is not 100% stopped in the func test env. Also
        # we cannot really keep the semaphore forever as this named semaphore
        # is shared between the old and new compute service.
        # There is another way to trigger the issue. We can inject a sleep into
        # instance_claim() to stop it. This is less realistic but it works in
        # the test env.
        server_req = self._build_minimal_create_server_request(
            self.api,
            'interrupted-server',
            flavor_id=self.flavor1['id'],
            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
            networks='none')

        def sleep_forever(*args, **kwargs):
            time.sleep(1000000)

        with mock.patch('nova.compute.resource_tracker.ResourceTracker.'
                        'instance_claim') as mock_instance_claim:
            mock_instance_claim.side_effect = sleep_forever

            server = self.api.post_server({'server': server_req})
            self._wait_for_state_change(self.admin_api, server, 'BUILD')

            # the instance.create.start is the closest thing to the
            # instance_claim call we can wait for in the test
            fake_notifier.wait_for_versioned_notifications(
                'instance.create.start')
            self.restart_compute_service(self.compute1)

        # This is bug 1833581 as the server remains in BUILD state after the
        # compute restart.
        self._wait_for_state_change(self.admin_api, server, 'BUILD')

        # Not even the periodic task push this server to ERROR because the
        # server host is still None since the instance_claim didn't set it.
        self.flags(instance_build_timeout=1)
        self.compute1.manager._check_instance_build_time(
            nova_context.get_admin_context())
        server = self.admin_api.get_server(server['id'])
        self.assertEqual('BUILD', server['status'])
        self.assertIsNone(server['OS-EXT-SRV-ATTR:host'])
예제 #11
0
    def test_migrate_reschedule_blocked_az_up_call(self):
        # We need to stub out the call to get_host_availability_zone to blow
        # up once we have gone to the compute service.
        original_prep_resize = compute_manager.ComputeManager._prep_resize
        self.rescheduled = None

        def wrap_prep_resize(_self, *args, **kwargs):
            # Poison the AZ query to blow up as if the cell conductor does not
            # have access to the API DB.
            self.agg_mock = self.useFixture(
                fixtures.MockPatch(
                    'nova.objects.AggregateList.get_by_host',
                    side_effect=oslo_db_exc.CantStartEngineError)).mock
            if self.rescheduled is None:
                # Track the first host that we rescheduled from.
                self.rescheduled = _self.host
                # Trigger a reschedule.
                raise exception.ComputeResourcesUnavailable(
                    reason='test_migrate_reschedule_blocked_az_up_call')
            return original_prep_resize(_self, *args, **kwargs)

        self.stub_out('nova.compute.manager.ComputeManager._prep_resize',
                      wrap_prep_resize)
        server = self._build_minimal_create_server_request(
            self.api, 'test_migrate_reschedule_blocked_az_up_call')
        server = self.api.post_server({'server': server})
        server = self._wait_for_state_change(self.api, server, 'ACTIVE')
        original_host = server['OS-EXT-SRV-ATTR:host']

        # Now cold migrate the server to the other host.
        self.api.post_server_action(server['id'], {'migrate': None})

        # FIXME(mriedem): This is bug 1781286 where we reschedule from the
        # first selected host to conductor which will try to get the AZ for the
        # alternate host selection which will fail since it cannot access the
        # API DB.
        fake_notifier.wait_for_versioned_notifications(
            'compute_task.migrate_server.error')
        server = self._wait_for_server_parameter(
            self.api, server, {
                'status': 'ERROR',
                'OS-EXT-SRV-ATTR:host': original_host,
                'OS-EXT-STS:task_state': None
            })
        # Assert there is a fault injected on the server. This is a bit
        # annoying in that we would expect to see CantStartEngineError but
        # because of how ComputeManager._reschedule_resize_or_reraise works.
        # the reschedule call to conductor is an RPC call so that exception
        # comes back to compute which injects a fault but then re-raises the
        # ComputeResourcesUnavailable exception which gets recorded as the most
        # recent fault which is what shows up in the API. So instead we assert
        # that the reschedule happened and assert the mocked method was called.
        self.assertIn('Insufficient compute resources',
                      server['fault']['message'])
        self.assertIsNotNone(self.rescheduled)
        self.agg_mock.assert_called_once()
예제 #12
0
    def _delete_and_check_allocations(self, server):
        """Delete the instance and asserts that the allocations are cleaned

        If the server was moved (resized or live migrated), also checks that
        migration-based allocations are also cleaned up.

        :param server: The API representation of the instance to be deleted
        :returns: The uuid of the migration record associated with the resize
            or cold migrate operation
        """
        # First check to see if there is a related migration record so we can
        # assert its allocations (if any) are not leaked.
        with utils.temporary_mutation(self.admin_api, microversion='2.59'):
            migrations = self.admin_api.api_get(
                '/os-migrations?instance_uuid=%s' %
                server['id']).body['migrations']

        if migrations:
            # If there is more than one migration, they are sorted by
            # created_at in descending order so we'll get the last one
            # which is probably what we'd always want anyway.
            migration_uuid = migrations[0]['uuid']
        else:
            migration_uuid = None

        self._delete_server(server)

        # NOTE(gibi): The resource allocation is deleted after the instance is
        # destroyed in the db so wait_until_deleted might return before the
        # the resource are deleted in placement. So we need to wait for the
        # instance.delete.end notification as that is emitted after the
        # resources are freed.

        fake_notifier.wait_for_versioned_notifications('instance.delete.end')

        for rp_uuid in [
                self._get_provider_uuid_by_host(hostname)
                for hostname in self.computes.keys()
        ]:
            self.assertRequestMatchesUsage(
                {
                    'VCPU': 0,
                    'MEMORY_MB': 0,
                    'DISK_GB': 0
                }, rp_uuid)

        # and no allocations for the deleted server
        allocations = self._get_allocations_by_server_uuid(server['id'])
        self.assertEqual(0, len(allocations))

        if migration_uuid:
            # and no allocations for the delete migration
            allocations = self._get_allocations_by_server_uuid(migration_uuid)
            self.assertEqual(0, len(allocations))

        return migration_uuid
예제 #13
0
 def _attach_interface(self, server, port_uuid):
     """attach a neutron port to a server."""
     body = {
         "interfaceAttachment": {
             "port_id": port_uuid
         }
     }
     attachment = self.api.attach_interface(server['id'], body)
     fake_notifier.wait_for_versioned_notifications(
         'instance.interface_attach.end')
     return attachment
예제 #14
0
 def _revert_resize(self, server):
     self.api.post_server_action(server['id'], {'revertResize': None})
     server = self._wait_for_state_change(server, 'ACTIVE')
     self._wait_for_migration_status(server, ['reverted'])
     # Note that the migration status is changed to "reverted" in the
     # dest host revert_resize method but the allocations are cleaned up
     # in the source host finish_revert_resize method so we need to wait
     # for the finish_revert_resize method to complete.
     fake_notifier.wait_for_versioned_notifications(
         'instance.resize_revert.end')
     return server
예제 #15
0
    def test_cache_image(self):
        """Test caching images by injecting the request directly to
        the conductor service and making sure it fans out and calls
        the expected nodes.
        """

        aggregate = objects.Aggregate(
            name='test',
            uuid=uuids.aggregate,
            id=1,
            hosts=['compute1', 'compute3', 'compute4', 'compute5'])
        self.conductor.compute_task_mgr.cache_images(self.context, aggregate,
                                                     ['an-image'])

        # NOTE(danms): We expect only three image cache attempts because
        # compute5 is marked as forced-down and compute2 is not in the
        # requested aggregate.
        for host in ['compute1', 'compute3', 'compute4']:
            mgr = getattr(self, host)
            self.assertEqual(set(['an-image']), mgr.driver.cached_images)
        for host in ['compute2', 'compute5']:
            mgr = getattr(self, host)
            self.assertEqual(set(), mgr.driver.cached_images)

        fake_notifier.wait_for_versioned_notifications(
            'aggregate.cache_images.start')

        progress = fake_notifier.wait_for_versioned_notifications(
            'aggregate.cache_images.progress', n_events=4)
        self.assertEqual(4, len(progress), progress)
        for notification in progress:
            payload = notification['payload']['nova_object.data']
            if payload['host'] == 'compute5':
                self.assertEqual(['an-image'], payload['images_failed'])
                self.assertEqual([], payload['images_cached'])
            else:
                self.assertEqual(['an-image'], payload['images_cached'])
                self.assertEqual([], payload['images_failed'])
            self.assertLessEqual(payload['index'], 4)
            self.assertGreater(payload['index'], 0)
            self.assertEqual(4, payload['total'])
            self.assertIn('conductor', notification['publisher_id'])

        fake_notifier.wait_for_versioned_notifications(
            'aggregate.cache_images.end')

        logtext = self.stdlog.logger.output

        self.assertIn(
            '3 cached, 0 existing, 0 errors, 0 unsupported, 1 skipped',
            logtext)
        self.assertNotIn('Image pre-cache operation for image an-image failed',
                         logtext)
    def test_restart_compute_while_instance_waiting_for_resource_claim(self):
        """Test for bug 1833581 where an instance is stuck in
        BUILD state forever due to compute service is restarted before the
        resource claim finished.
        """

        # To reproduce the problem we need to stop / kill the compute service
        # when an instance build request has already reached the service but
        # the instance_claim() has not finished. One way that this
        # happens in practice is when multiple builds are waiting for the
        # 'nova-compute-resource' semaphore. So one way to reproduce this in
        # the test would be to grab that semaphore, boot an instance, wait for
        # it to reach the compute then stop the compute.
        # Unfortunately when we release the semaphore after the simulated
        # compute restart the original instance_claim execution continues as
        # the stopped compute is not 100% stopped in the func test env. Also
        # we cannot really keep the semaphore forever as this named semaphore
        # is shared between the old and new compute service.
        # There is another way to trigger the issue. We can inject a sleep into
        # instance_claim() to stop it. This is less realistic but it works in
        # the test env.
        server_req = self._build_minimal_create_server_request(
            'interrupted-server',
            flavor_id=self.flavor1['id'],
            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
            networks='none')

        def sleep_forever(*args, **kwargs):
            time.sleep(1000000)

        with mock.patch('nova.compute.resource_tracker.ResourceTracker.'
                        'instance_claim') as mock_instance_claim:
            mock_instance_claim.side_effect = sleep_forever

            server = self.api.post_server({'server': server_req})
            self._wait_for_state_change(server, 'BUILD')

            # the instance.create.start is the closest thing to the
            # instance_claim call we can wait for in the test
            fake_notifier.wait_for_versioned_notifications(
                'instance.create.start')

            with mock.patch('nova.compute.manager.LOG.debug') as mock_log:
                self.restart_compute_service(self.compute1)

        # We expect that the instance is pushed to ERROR state during the
        # compute restart.
        self._wait_for_state_change(server, 'ERROR')
        mock_log.assert_called_with(
            'Instance spawn was interrupted before instance_claim, setting '
            'instance to ERROR state',
            instance=mock.ANY)
    def test_server_power_update(self):
        # This test checks the functionality of handling the "power-update"
        # external events.
        self.assertEqual(power_state.RUNNING,
                         self.server['OS-EXT-STS:power_state'])
        self.api.create_server_external_events(events=[self.power_off])
        expected_params = {
            'OS-EXT-STS:task_state': None,
            'OS-EXT-STS:vm_state': vm_states.STOPPED,
            'OS-EXT-STS:power_state': power_state.SHUTDOWN
        }
        server = self._wait_for_server_parameter(self.server, expected_params)
        msg = ' with target power state POWER_OFF.'
        self.assertIn(msg, self.stdlog.logger.output)
        # Test if this is logged in the instance action list.
        actions = self.api.get_instance_actions(server['id'])
        self.assertEqual(2, len(actions))
        acts = {action['action']: action for action in actions}
        self.assertEqual(['create', 'stop'], sorted(acts))
        stop_action = acts[instance_actions.STOP]
        detail = self.api.api_get(
            '/servers/%s/os-instance-actions/%s' %
            (server['id'], stop_action['request_id'])).body['instanceAction']
        events_by_name = {event['event']: event for event in detail['events']}
        self.assertEqual(1, len(detail['events']), detail)
        self.assertIn('compute_power_update', events_by_name)
        self.assertEqual('Success', detail['events'][0]['result'])
        # Test if notifications were emitted.
        fake_notifier.wait_for_versioned_notifications(
            'instance.power_off.start')
        fake_notifier.wait_for_versioned_notifications(
            'instance.power_off.end')

        # Checking POWER_ON
        self.api.create_server_external_events(events=[self.power_on])
        expected_params = {
            'OS-EXT-STS:task_state': None,
            'OS-EXT-STS:vm_state': vm_states.ACTIVE,
            'OS-EXT-STS:power_state': power_state.RUNNING
        }
        server = self._wait_for_server_parameter(self.server, expected_params)
        msg = ' with target power state POWER_ON.'
        self.assertIn(msg, self.stdlog.logger.output)
        # Test if this is logged in the instance action list.
        actions = self.api.get_instance_actions(server['id'])
        self.assertEqual(3, len(actions))
        acts = {action['action']: action for action in actions}
        self.assertEqual(['create', 'start', 'stop'], sorted(acts))
        start_action = acts[instance_actions.START]
        detail = self.api.api_get(
            '/servers/%s/os-instance-actions/%s' %
            (server['id'], start_action['request_id'])).body['instanceAction']
        events_by_name = {event['event']: event for event in detail['events']}
        self.assertEqual(1, len(detail['events']), detail)
        self.assertIn('compute_power_update', events_by_name)
        self.assertEqual('Success', detail['events'][0]['result'])
        # Test if notifications were emitted.
        fake_notifier.wait_for_versioned_notifications(
            'instance.power_on.start')
        fake_notifier.wait_for_versioned_notifications('instance.power_on.end')
예제 #18
0
        def fake_rebuild(self_, context, instance, *args, **kwargs):
            # Simulate that the rebuild request of one of the instances
            # reaches the target compute manager significantly later so the
            # rebuild of the other instance can finish before the late
            # validation of the first rebuild.
            # We cannot simply delay the virt driver's rebuild or the
            # manager's _rebuild_default_impl as those run after the late
            # validation
            if instance.host == 'host1':
                # wait for the other instance rebuild to start
                fake_notifier.wait_for_versioned_notifications(
                    'instance.rebuild.start', n_events=1)

            original_rebuild(self_, context, instance, *args, **kwargs)
예제 #19
0
        def fake_rebuild(self_, context, instance, *args, **kwargs):
            # Simulate that the rebuild request of one of the instances
            # reaches the target compute manager significantly later so the
            # rebuild of the other instance can finish before the late
            # validation of the first rebuild.
            # We cannot simply delay the virt driver's rebuild or the
            # manager's _rebuild_default_impl as those run after the late
            # validation
            if instance.host == 'host1':
                # wait for the other instance rebuild to start
                fake_notifier.wait_for_versioned_notifications(
                    'instance.rebuild.start', n_events=1)

            original_rebuild(self_, context, instance, *args, **kwargs)
예제 #20
0
    def test_migrate_reschedule_blocked_az_up_call(self):
        self.flags(default_availability_zone='us-central')
        # We need to stub out the call to get_host_availability_zone to blow
        # up once we have gone to the compute service.
        original_prep_resize = compute_manager.ComputeManager._prep_resize
        self.rescheduled = None

        def wrap_prep_resize(_self, *args, **kwargs):
            # Poison the AZ query to blow up as if the cell conductor does not
            # have access to the API DB.
            self.agg_mock = self.useFixture(
                fixtures.MockPatch(
                    'nova.objects.AggregateList.get_by_host',
                    side_effect=oslo_db_exc.CantStartEngineError)).mock
            if self.rescheduled is None:
                # Track the first host that we rescheduled from.
                self.rescheduled = _self.host
                # Trigger a reschedule.
                raise exception.ComputeResourcesUnavailable(
                    reason='test_migrate_reschedule_blocked_az_up_call')
            return original_prep_resize(_self, *args, **kwargs)

        self.stub_out('nova.compute.manager.ComputeManager._prep_resize',
                      wrap_prep_resize)
        server = self._build_minimal_create_server_request(
            self.api, 'test_migrate_reschedule_blocked_az_up_call')
        server = self.api.post_server({'server': server})
        server = self._wait_for_state_change(self.api, server, 'ACTIVE')
        original_host = server['OS-EXT-SRV-ATTR:host']

        # Now cold migrate the server to the other host.
        self.api.post_server_action(server['id'], {'migrate': None})
        # Because we poisoned AggregateList.get_by_host after hitting the
        # compute service we have to wait for the notification that the resize
        # is complete and then stop the mock so we can use the API again.
        fake_notifier.wait_for_versioned_notifications(
            'instance.resize_finish.end')
        # Note that we use stopall here because we actually called _prep_resize
        # twice so we have more than one instance of the mock that needs to be
        # stopped.
        mock.patch.stopall()
        server = self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
        final_host = server['OS-EXT-SRV-ATTR:host']
        self.assertNotIn(final_host, [original_host, self.rescheduled])
        # We should have rescheduled and the instance AZ should be set from the
        # Selection object. Since neither compute host is in an AZ, the server
        # is in the default AZ from config.
        self.assertEqual('us-central', server['OS-EXT-AZ:availability_zone'])
        self.agg_mock.assert_not_called()
예제 #21
0
    def test_wait_for_versioned_notifications(self):
        # Wait for a single notification which we emitted first
        self._generate_exception_notification()

        notifications = fake_notifier.wait_for_versioned_notifications(
                'compute.exception')
        self.assertEqual(1, len(notifications))
예제 #22
0
    def test_wait_for_versioned_notifications(self):
        # Wait for a single notification which we emitted first
        self._generate_exception_notification()

        notifications = fake_notifier.wait_for_versioned_notifications(
            'compute.exception')
        self.assertEqual(1, len(notifications))
예제 #23
0
    def test_aggregate_cache_images(self):
        aggregate_req = {
            "aggregate": {
                "name": "my-aggregate",
                "availability_zone": "nova"}}
        aggregate = self.admin_api.post_aggregate(aggregate_req)
        add_host_req = {
            "add_host": {
                "host": "compute"
            }
        }
        self.admin_api.post_aggregate_action(aggregate['id'], add_host_req)

        fake_notifier.reset()

        cache_images_req = {
            'cache': [
                {'id': '155d900f-4e14-4e4c-a73d-069cbf4541e6'}
            ]
        }
        self.admin_api.api_post('/os-aggregates/%s/images' % aggregate['id'],
                                cache_images_req)
        # Since the operation is asynchronous we have to wait for the end
        # notification.
        fake_notifier.wait_for_versioned_notifications(
            'aggregate.cache_images.end')

        self.assertEqual(3, len(fake_notifier.VERSIONED_NOTIFICATIONS),
                         fake_notifier.VERSIONED_NOTIFICATIONS)
        self._verify_notification(
            'aggregate-cache_images-start',
            replacements={
                'uuid': aggregate['uuid'],
                'id': aggregate['id']},
            actual=fake_notifier.VERSIONED_NOTIFICATIONS[0])
        self._verify_notification(
            'aggregate-cache_images-progress',
            replacements={
                'uuid': aggregate['uuid'],
                'id': aggregate['id']},
            actual=fake_notifier.VERSIONED_NOTIFICATIONS[1])
        self._verify_notification(
            'aggregate-cache_images-end',
            replacements={
                'uuid': aggregate['uuid'],
                'id': aggregate['id']},
            actual=fake_notifier.VERSIONED_NOTIFICATIONS[2])
예제 #24
0
    def _delete_and_check_allocations(self, server):
        """Delete the instance and asserts that the allocations are cleaned

        If the server was moved (resized or live migrated), also checks that
        migration-based allocations are also cleaned up.

        :param server: The API representation of the instance to be deleted
        """

        # First check to see if there is a related migration record so we can
        # assert its allocations (if any) are not leaked.
        with utils.temporary_mutation(self.admin_api, microversion='2.59'):
            migrations = self.admin_api.api_get(
                '/os-migrations?instance_uuid=%s' %
                server['id']).body['migrations']
        if migrations:
            # If there is more than one migration, they are sorted by
            # created_at in descending order so we'll get the last one
            # which is probably what we'd always want anyway.
            migration_uuid = migrations[0]['uuid']
        else:
            migration_uuid = None

        self.api.delete_server(server['id'])
        self._wait_until_deleted(server)
        # NOTE(gibi): The resource allocation is deleted after the instance is
        # destroyed in the db so wait_until_deleted might return before the
        # the resource are deleted in placement. So we need to wait for the
        # instance.delete.end notification as that is emitted after the
        # resources are freed.

        fake_notifier.wait_for_versioned_notifications('instance.delete.end')

        for rp_uuid in [self._get_provider_uuid_by_host(hostname)
                        for hostname in self.computes.keys()]:
            self.assertRequestMatchesUsage({'VCPU': 0,
                                            'MEMORY_MB': 0,
                                            'DISK_GB': 0}, rp_uuid)

        # and no allocations for the deleted server
        allocations = self._get_allocations_by_server_uuid(server['id'])
        self.assertEqual(0, len(allocations))

        if migration_uuid:
            # and no allocations for the delete migration
            allocations = self._get_allocations_by_server_uuid(migration_uuid)
            self.assertEqual(0, len(allocations))
예제 #25
0
    def test_wait_for_versioned_notifications_too_many(self):
        # Wait for a single notification when there are 2 in the queue
        self._generate_exception_notification()
        self._generate_exception_notification()

        notifications = fake_notifier.wait_for_versioned_notifications(
            'compute.exception')
        self.assertEqual(2, len(notifications))
예제 #26
0
    def test_wait_for_versioned_notifications_too_many(self):
        # Wait for a single notification when there are 2 in the queue
        self._generate_exception_notification()
        self._generate_exception_notification()

        notifications = fake_notifier.wait_for_versioned_notifications(
                'compute.exception')
        self.assertEqual(2, len(notifications))
예제 #27
0
 def _assert_allocation_revert_on_fail(self, server):
     # Since this happens in MigrationTask.rollback in conductor, we need
     # to wait for something which happens after that, which is the
     # ComputeTaskManager._cold_migrate method sending the
     # compute_task.migrate_server.error event.
     fake_notifier.wait_for_versioned_notifications(
         'compute_task.migrate_server.error')
     mig_uuid = self.get_migration_uuid_for_instance(server['id'])
     mig_allocs = self._get_allocations_by_server_uuid(mig_uuid)
     self.assertEqual({}, mig_allocs)
     source_rp_uuid = self._get_provider_uuid_by_host(
         server['OS-EXT-SRV-ATTR:host'])
     server_allocs = self._get_allocations_by_server_uuid(server['id'])
     volume_backed = False if server['image'] else True
     self.assertFlavorMatchesAllocation(
         server['flavor'], server_allocs[source_rp_uuid]['resources'],
         volume_backed=volume_backed)
예제 #28
0
    def test_unshelve_offloaded_fails_due_to_neutron(self):
        server = self._create_server(networks=[{
            'port':
            self.neutron.port_1['id']
        }],
                                     az='nova:host1')

        # with default config shelve means immediate offload as well
        req = {'shelve': {}}
        self.api.post_server_action(server['id'], req)
        self._wait_for_server_parameter(server, {
            'status': 'SHELVED_OFFLOADED',
            'OS-EXT-SRV-ATTR:host': None
        })
        allocations = self.placement_api.get('/allocations/%s' %
                                             server['id']).body['allocations']
        self.assertEqual(0, len(allocations))

        # disable the original host of the instance to force a port update
        # during unshelve
        source_service_id = self.api.get_services(
            host='host1', binary='nova-compute')[0]['id']
        self.api.put_service(source_service_id, {"status": "disabled"})

        # Simulate that port update fails during unshelve due to neutron is
        # unavailable
        with mock.patch('nova.tests.fixtures.NeutronFixture.'
                        'update_port') as mock_update_port:
            mock_update_port.side_effect = neutron_exception.ConnectionFailed(
                reason='test')
            req = {'unshelve': None}
            self.api.post_server_action(server['id'], req)
            fake_notifier.wait_for_versioned_notifications(
                'instance.unshelve.start')
            self._wait_for_server_parameter(
                server, {
                    'status': 'SHELVED_OFFLOADED',
                    'OS-EXT-STS:task_state': None,
                    'OS-EXT-SRV-ATTR:host': None
                })

        # As the instance went back to offloaded state we expect no allocation
        allocations = self.placement_api.get('/allocations/%s' %
                                             server['id']).body['allocations']
        self.assertEqual(0, len(allocations))
    def test_rebuild_with_keypair(self):
        keypair_req = {
            'keypair': {
                'name': 'test-key1',
                'type': 'ssh',
            },
        }
        keypair1 = self.api.post_keypair(keypair_req)
        keypair_req['keypair']['name'] = 'test-key2'
        keypair2 = self.api.post_keypair(keypair_req)

        server = self._build_server(networks='none')
        server.update({'key_name': 'test-key1'})

        # Create a server with keypair 'test-key1'
        server = self.api.post_server({'server': server})
        self._wait_for_state_change(server, 'ACTIVE')

        # Check keypairs
        ctxt = context.get_admin_context()
        instance = objects.Instance.get_by_uuid(ctxt,
                                                server['id'],
                                                expected_attrs=['keypairs'])
        self.assertEqual(keypair1['public_key'],
                         instance.keypairs[0].public_key)

        # Rebuild a server with keypair 'test-key2'
        body = {
            'rebuild': {
                'imageRef': self.glance.auto_disk_config_enabled_image['id'],
                'key_name': 'test-key2',
            },
        }
        self.api.api_post('servers/%s/action' % server['id'], body)
        fake_notifier.wait_for_versioned_notifications('instance.rebuild.end')
        self._wait_for_state_change(server, 'ACTIVE')

        # Check keypairs changed
        instance = objects.Instance.get_by_uuid(ctxt,
                                                server['id'],
                                                expected_attrs=['keypairs'])
        self.assertEqual(keypair2['public_key'],
                         instance.keypairs[0].public_key)
예제 #30
0
 def _wait_for_notifications(self, event_type, expected_count, timeout=1.0):
     notifications = fake_notifier.wait_for_versioned_notifications(
         event_type, n_events=expected_count, timeout=timeout)
     self.assertEqual(
         expected_count, len(notifications),
         'Unexpected number of %s notifications '
         'within the given timeout. '
         'Expected %d, got %d: %s' %
         (event_type, expected_count, len(notifications), notifications))
     return notifications
예제 #31
0
    def test_server_create_reschedule_blocked_az_up_call(self):
        # We need to stub out the call to get_host_availability_zone to blow
        # up once we have gone to the compute service. With the way our
        # RPC/DB fixtures are setup it's non-trivial to try and separate a
        # superconductor from a cell conductor so we can configure the cell
        # conductor from not having access to the API DB but that would be a
        # a nice thing to have at some point.
        original_bari = compute_manager.ComputeManager.build_and_run_instance

        def wrap_bari(*args, **kwargs):
            # Poison the AZ query to blow up as if the cell conductor does not
            # have access to the API DB.
            self.useFixture(
                fixtures.MockPatch(
                    'nova.objects.AggregateList.get_by_host',
                    side_effect=oslo_db_exc.CantStartEngineError))
            return original_bari(*args, **kwargs)

        self.stub_out(
            'nova.compute.manager.ComputeManager.'
            'build_and_run_instance', wrap_bari)
        server = self._build_minimal_create_server_request(
            self.api, 'test_server_create_reschedule_blocked_az_up_call')
        server = self.api.post_server({'server': server})
        # FIXME(mriedem): This is bug 1781286 where we reschedule from the
        # first failed host to conductor which will try to get the AZ for the
        # alternate host selection which will fail since it cannot access the
        # API DB.
        # Note that we have to wait for the notification before calling the API
        # to avoid a race where instance.host is not None and the API tries to
        # hit the AggregateList.get_by_host method we mocked to fail.
        fake_notifier.wait_for_versioned_notifications(
            'compute_task.build_instances.error')
        server = self._wait_for_server_parameter(
            self.api, server, {
                'status': 'ERROR',
                'OS-EXT-SRV-ATTR:host': None,
                'OS-EXT-STS:task_state': None
            })
        # Assert there is a fault injected on the server for the error we
        # expect.
        self.assertIn('CantStartEngineError', server['fault']['message'])
예제 #32
0
    def _wait_for_notifications(self, event_type, expected_count,
                                timeout=10.0):
        notifications = fake_notifier.wait_for_versioned_notifications(
                event_type, n_events=expected_count, timeout=timeout)
        msg = ''.join('\n%s' % notif for notif in notifications)

        self.assertEqual(expected_count, len(notifications),
                         'Unexpected number of %s notifications '
                         'within the given timeout. '
                         'Expected %d, got %d: %s' %
                         (event_type, expected_count, len(notifications), msg))
        return notifications
예제 #33
0
    def _evacuate_with_failure(self, server, compute1):
        # Perform an evacuation during which we experience a failure on the
        # destination host

        with mock.patch.object(compute1.driver, 'plug_vifs') as plug_vifs:
            plug_vifs.side_effect = test.TestingException

            server = self._evacuate_server(server, {'host': 'compute1'},
                                           expected_state='ERROR',
                                           expected_task_state=None,
                                           expected_migration_status='failed')

            # Wait for the rebuild to start, then complete
            fake_notifier.wait_for_versioned_notifications(
                'instance.rebuild.start')

            # Meta-test
            plug_vifs.assert_called()
            plug_vifs.reset_mock()

        # Return fresh server state after evacuate
        return server
예제 #34
0
    def test_server_create_reschedule_blocked_az_up_call(self):
        self.flags(default_availability_zone='us-central')
        # We need to stub out the call to get_host_availability_zone to blow
        # up once we have gone to the compute service. With the way our
        # RPC/DB fixtures are setup it's non-trivial to try and separate a
        # superconductor from a cell conductor so we can configure the cell
        # conductor from not having access to the API DB but that would be a
        # a nice thing to have at some point.
        original_bari = compute_manager.ComputeManager.build_and_run_instance

        def wrap_bari(*args, **kwargs):
            # Poison the AZ query to blow up as if the cell conductor does not
            # have access to the API DB.
            self.useFixture(
                fixtures.MockPatch(
                    'nova.objects.AggregateList.get_by_host',
                    side_effect=oslo_db_exc.CantStartEngineError))
            return original_bari(*args, **kwargs)

        self.stub_out(
            'nova.compute.manager.ComputeManager.'
            'build_and_run_instance', wrap_bari)
        server = self._build_minimal_create_server_request(
            'test_server_create_reschedule_blocked_az_up_call')
        server = self.api.post_server({'server': server})
        # Because we poisoned AggregateList.get_by_host after hitting the
        # compute service we have to wait for the notification that the build
        # is complete and then stop the mock so we can use the API again.
        fake_notifier.wait_for_versioned_notifications('instance.create.end')
        # Note that we use stopall here because we actually called
        # build_and_run_instance twice so we have more than one instance of
        # the mock that needs to be stopped.
        mock.patch.stopall()
        server = self._wait_for_state_change(server, 'ACTIVE')
        # We should have rescheduled and the instance AZ should be set from the
        # Selection object. Since neither compute host is in an AZ, the server
        # is in the default AZ from config.
        self.assertEqual('us-central', server['OS-EXT-AZ:availability_zone'])
예제 #35
0
 def _wait_for_notification(self, event_type, timeout=10.0):
     # NOTE(mdbooth): wait_for_versioned_notifications raises an exception
     # if it times out since change I017d1a31. Consider removing this
     # method.
     fake_notifier.wait_for_versioned_notifications(
         event_type, timeout=timeout)
예제 #36
0
 def _resume_server(self, server, expected_state='ACTIVE'):
     """Resume a server."""
     self.api.post_server_action(server['id'], {'resume': {}})
     fake_notifier.wait_for_versioned_notifications('instance.resume.end')
     return self._wait_for_state_change(server, expected_state)
예제 #37
0
 def _suspend_server(self, server, expected_state='SUSPENDED'):
     """Suspend a server."""
     self.api.post_server_action(server['id'], {'suspend': {}})
     fake_notifier.wait_for_versioned_notifications('instance.suspend.end')
     return self._wait_for_state_change(server, expected_state)
예제 #38
0
    def test_parallel_evacuate_with_server_group(self):
        self.skipTest('Skipped until bug 1763181 is fixed')
        group_req = {'name': 'a-name', 'policies': ['anti-affinity']}
        group = self.api.post_server_groups(group_req)

        # boot two instances with anti-affinity
        server = {'name': 'server',
                  'imageRef': self.image_id,
                  'flavorRef': self.flavor_id}
        hints = {'group': group['id']}
        created_server1 = self.api.post_server({'server': server,
                                                'os:scheduler_hints': hints})
        server1 = self._wait_for_state_change(self.api,
                                              created_server1, 'ACTIVE')

        created_server2 = self.api.post_server({'server': server,
                                                'os:scheduler_hints': hints})
        server2 = self._wait_for_state_change(self.api,
                                              created_server2, 'ACTIVE')

        # assert that the anti-affinity policy is enforced during the boot
        self.assertNotEqual(server1['OS-EXT-SRV-ATTR:host'],
                            server2['OS-EXT-SRV-ATTR:host'])

        # simulate compute failure on both compute host to allow evacuation
        self.compute1.stop()
        # force it down to avoid waiting for the service group to time out
        self.api.force_down_service('host1', 'nova-compute', True)

        self.compute2.stop()
        self.api.force_down_service('host2', 'nova-compute', True)

        # start a third compute to have place for one of the instances
        fake.set_nodes(['host3'])
        self.compute3 = self.start_service('compute', host='host3')

        # evacuate both instances
        post = {'evacuate': {}}
        self.api.post_server_action(server1['id'], post)
        self.api.post_server_action(server2['id'], post)

        # make sure that the rebuild is started and then finished
        # NOTE(mdbooth): We only get 1 rebuild.start notification here because
        # we validate server group policy (and therefore fail) before emitting
        # rebuild.start.
        fake_notifier.wait_for_versioned_notifications(
            'instance.rebuild.start', n_events=1)
        server1 = self._wait_for_server_parameter(
            self.api, server1, {'OS-EXT-STS:task_state': None})
        server2 = self._wait_for_server_parameter(
            self.api, server2, {'OS-EXT-STS:task_state': None})

        # NOTE(gibi): The instance.host set _after_ the instance state and
        # tast_state is set back to normal so it is not enough to wait for
        # that. The only thing that happens after the instance.host is set to
        # the target host is the migration status setting to done. So we have
        # to wait for that to avoid asserting the wrong host below.
        self._wait_for_migration_status(server1, ['done', 'failed'])
        self._wait_for_migration_status(server2, ['done', 'failed'])

        # get the servers again to have the latest information about their
        # hosts
        server1 = self.api.get_server(server1['id'])
        server2 = self.api.get_server(server2['id'])

        # assert that the anti-affinity policy is enforced during the
        # evacuation
        self.assertNotEqual(server1['OS-EXT-SRV-ATTR:host'],
                            server2['OS-EXT-SRV-ATTR:host'])

        # assert that one of the evacuation was successful and that server is
        # moved to another host and the evacuation of the other server is
        # failed
        if server1['status'] == 'ERROR':
            failed_server = server1
            evacuated_server = server2
        else:
            failed_server = server2
            evacuated_server = server1
        self.assertEqual('ERROR', failed_server['status'])
        self.assertNotEqual('host3', failed_server['OS-EXT-SRV-ATTR:host'])
        self.assertEqual('ACTIVE', evacuated_server['status'])
        self.assertEqual('host3', evacuated_server['OS-EXT-SRV-ATTR:host'])
예제 #39
0
 def _wait_for_notification(self, event_type, timeout=10.0):
     notifications = fake_notifier.wait_for_versioned_notifications(
         event_type, timeout=timeout)
     self.assertTrue(
         len(notifications) > 0,
         'notification %s hasn\'t been received' % event_type)