def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell): ctxt = context.get_context() mapping = objects.CellMapping(database_connection='fake://db', transport_url='fake://mq', uuid=uuids.cell) mappings = objects.CellMappingList(objects=[mapping]) # Use a mock manager to assert call order across mocks. manager = mock.Mock() manager.attach_mock(mock_get_inst, 'get_inst') manager.attach_mock(mock_target_cell, 'target_cell') filters = {'deleted': False} context.scatter_gather_cells( ctxt, mappings, 60, objects.InstanceList.get_by_filters, filters, sort_dir='foo') # NOTE(melwitt): This only works without the SpawnIsSynchronous fixture # because when the spawn is treated as synchronous and the thread # function is called immediately, it will occur inside the target_cell # context manager scope when it wouldn't with a real spawn. # Assert that InstanceList.get_by_filters was called before the # target_cell context manager exited. get_inst_call = mock.call.get_inst( mock_target_cell.return_value.__enter__.return_value, filters, sort_dir='foo') expected_calls = [get_inst_call, mock.call.target_cell().__exit__(None, None, None)] manager.assert_has_calls(expected_calls)
def _get_instance_bdms_in_multiple_cells(ctxt, instance_uuids): inst_maps = objects.InstanceMappingList.get_by_instance_uuids( ctxt, instance_uuids) cell_mappings = {} for inst_map in inst_maps: if (inst_map.cell_mapping is not None and inst_map.cell_mapping.uuid not in cell_mappings): cell_mappings.update( {inst_map.cell_mapping.uuid: inst_map.cell_mapping}) bdms = {} results = nova_context.scatter_gather_cells( ctxt, cell_mappings.values(), nova_context.CELL_TIMEOUT, objects.BlockDeviceMappingList.bdms_by_instance_uuid, instance_uuids) for cell_uuid, result in results.items(): if isinstance(result, Exception): LOG.warning('Failed to get block device mappings for cell %s', cell_uuid) elif result is nova_context.did_not_respond_sentinel: LOG.warning( 'Timeout getting block device mappings for cell ' '%s', cell_uuid) else: bdms.update(result) return bdms
def test_scatter_gather_cells_exception(self, mock_get_inst, mock_log_exception): # This is needed because we're mocking get_by_filters. self.useFixture(nova_fixtures.SpawnIsSynchronousFixture()) ctxt = context.get_context() mapping0 = objects.CellMapping(database_connection='fake://db0', transport_url='none:///', uuid=objects.CellMapping.CELL0_UUID) mapping1 = objects.CellMapping(database_connection='fake://db1', transport_url='fake://mq1', uuid=uuids.cell1) mappings = objects.CellMappingList(objects=[mapping0, mapping1]) # Simulate cell1 raising an exception. mock_get_inst.side_effect = [ mock.sentinel.instances, test.TestingException() ] results = context.scatter_gather_cells( ctxt, mappings, 30, objects.InstanceList.get_by_filters) self.assertEqual(2, len(results)) self.assertIn(mock.sentinel.instances, results.values()) self.assertIn(context.raised_exception_sentinel, results.values()) self.assertTrue(mock_log_exception.called)
def test_scatter_gather_cells_timeout(self, mock_get_inst, mock_get_result, mock_timeout, mock_log_warning): # This is needed because we're mocking get_by_filters. self.useFixture(nova_fixtures.SpawnIsSynchronousFixture()) ctxt = context.get_context() mapping0 = objects.CellMapping(database_connection='fake://db0', transport_url='none:///', uuid=objects.CellMapping.CELL0_UUID) mapping1 = objects.CellMapping(database_connection='fake://db1', transport_url='fake://mq1', uuid=uuids.cell1) mappings = objects.CellMappingList(objects=[mapping0, mapping1]) # Simulate cell1 not responding. mock_get_result.side_effect = [(mapping0.uuid, mock.sentinel.instances), exception.CellTimeout()] results = context.scatter_gather_cells( ctxt, mappings, 30, objects.InstanceList.get_by_filters) self.assertEqual(2, len(results)) self.assertIn(mock.sentinel.instances, results.values()) self.assertIn(context.did_not_respond_sentinel, results.values()) mock_timeout.assert_called_once_with(30, exception.CellTimeout) self.assertTrue(mock_log_warning.called)
def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell): self.useFixture(nova_fixtures.SpawnIsSynchronousFixture()) ctxt = context.get_context() mapping = objects.CellMapping(database_connection='fake://db', transport_url='fake://mq', uuid=uuids.cell) mappings = objects.CellMappingList(objects=[mapping]) filters = {'deleted': False} context.scatter_gather_cells( ctxt, mappings, 60, objects.InstanceList.get_by_filters, filters, sort_dir='foo') mock_get_inst.assert_called_once_with( mock_target_cell.return_value.__enter__.return_value, filters, sort_dir='foo')
def test_scatter_gather_cells_all_timeout(self, mock_get_inst, mock_get_result, mock_timeout, mock_log_warning): """This is a regression test for bug 1847131. test_scatter_gather_cells_timeout did not catch the issue because it yields a result which sets the cell_uuid variable in scope before the CellTimeout is processed and logged. In this test we only raise the CellTimeout so cell_uuid will not be in scope for the log message. """ # This is needed because we're mocking get_by_filters. self.useFixture(nova_fixtures.SpawnIsSynchronousFixture()) ctxt = context.get_context() mapping0 = objects.CellMapping(database_connection='fake://db0', transport_url='none:///', uuid=objects.CellMapping.CELL0_UUID) mappings = objects.CellMappingList(objects=[mapping0]) # Simulate cell0 not responding. mock_get_result.side_effect = exception.CellTimeout() results = context.scatter_gather_cells( ctxt, mappings, 30, objects.InstanceList.get_by_filters, {}) self.assertEqual(1, len(results)) self.assertIn(context.did_not_respond_sentinel, results.values()) mock_timeout.assert_called_once_with(30, exception.CellTimeout) mock_log_warning.assert_called_once_with( 'Timed out waiting for response from cell', exc_info=True)
def test_scatter_gather_cells(self): self._create_cell_mappings() # Create an instance in cell0 with context.target_cell(self.context, self.mapping0) as cctxt: instance = objects.Instance(context=cctxt, uuid=uuids.instance0, project_id='fake-project') instance.create() # Create an instance in first cell with context.target_cell(self.context, self.mapping1) as cctxt: instance = objects.Instance(context=cctxt, uuid=uuids.instance1, project_id='fake-project') instance.create() # Create an instance in second cell with context.target_cell(self.context, self.mapping2) as cctxt: instance = objects.Instance(context=cctxt, uuid=uuids.instance2, project_id='fake-project') instance.create() filters = {'deleted': False, 'project_id': 'fake-project'} results = context.scatter_gather_all_cells( self.context, objects.InstanceList.get_by_filters, filters, sort_dir='asc') instances = objects.InstanceList() for result in results.values(): instances = instances + result # Should have 3 instances across cells self.assertEqual(3, len(instances)) # Verify we skip cell0 when specified results = context.scatter_gather_skip_cell0( self.context, objects.InstanceList.get_by_filters, filters) instances = objects.InstanceList() for result in results.values(): instances = instances + result # Should have gotten only the instances from the last two cells self.assertEqual(2, len(instances)) self.assertIn(self.mapping1.uuid, results) self.assertIn(self.mapping2.uuid, results) instance_uuids = [inst.uuid for inst in instances] self.assertIn(uuids.instance1, instance_uuids) self.assertIn(uuids.instance2, instance_uuids) # Try passing one cell results = context.scatter_gather_cells( self.context, [self.mapping1], 60, objects.InstanceList.get_by_filters, filters) instances = objects.InstanceList() for result in results.values(): instances = instances + result # Should have gotten only one instance from cell1 self.assertEqual(1, len(instances)) self.assertIn(self.mapping1.uuid, results) self.assertEqual(uuids.instance1, instances[0].uuid)
def test_scatter_gather_cells_exception(self, mock_get_inst, mock_log_exception): # This is needed because we're mocking get_by_filters. self.useFixture(nova_fixtures.SpawnIsSynchronousFixture()) ctxt = context.get_context() mapping0 = objects.CellMapping(database_connection='fake://db0', transport_url='none:///', uuid=objects.CellMapping.CELL0_UUID) mapping1 = objects.CellMapping(database_connection='fake://db1', transport_url='fake://mq1', uuid=uuids.cell1) mappings = objects.CellMappingList(objects=[mapping0, mapping1]) # Simulate cell1 raising an exception. mock_get_inst.side_effect = [ mock.sentinel.instances, test.TestingException() ] filters = {'deleted': False} results = context.scatter_gather_cells( ctxt, mappings, 30, objects.InstanceList.get_by_filters, filters) self.assertEqual(2, len(results)) self.assertIn(mock.sentinel.instances, results.values()) self.assertIsInstance(results[mapping1.uuid], Exception) # non-NovaException gets logged self.assertTrue(mock_log_exception.called) # Now run it again with a NovaException to see it's not logged. mock_log_exception.reset_mock() mock_get_inst.side_effect = [ mock.sentinel.instances, exception.NotFound() ] results = context.scatter_gather_cells( ctxt, mappings, 30, objects.InstanceList.get_by_filters, filters) self.assertEqual(2, len(results)) self.assertIn(mock.sentinel.instances, results.values()) self.assertIsInstance(results[mapping1.uuid], exception.NovaException) # NovaExceptions are not logged, the caller should handle them. mock_log_exception.assert_not_called()
def get_compute_nodes_by_host_or_node(self, ctxt, host, node, cell=None): '''Get compute nodes from given host or node''' def return_empty_list_for_not_found(func): def wrapper(*args, **kwargs): try: ret = func(*args, **kwargs) except exception.NotFound: ret = objects.ComputeNodeList() return ret return wrapper @return_empty_list_for_not_found def _get_by_host_and_node(ctxt): compute_node = objects.ComputeNode.get_by_host_and_nodename( ctxt, host, node) return objects.ComputeNodeList(objects=[compute_node]) @return_empty_list_for_not_found def _get_by_host(ctxt): return objects.ComputeNodeList.get_all_by_host(ctxt, host) @return_empty_list_for_not_found def _get_by_node(ctxt): compute_node = objects.ComputeNode.get_by_nodename(ctxt, node) return objects.ComputeNodeList(objects=[compute_node]) if host and node: target_fnc = _get_by_host_and_node elif host: target_fnc = _get_by_host else: target_fnc = _get_by_node if host and not cell: # optimization not to issue queries to every cell DB cell = self._get_cell_by_host(ctxt, host) cells = [cell] if cell else self.enabled_cells timeout = context_module.CELL_TIMEOUT nodes_by_cell = context_module.scatter_gather_cells( ctxt, cells, timeout, target_fnc) # Only one cell should have values for the compute nodes # so we get them here, or return an empty list if no cell # has a value; be sure to filter out cell failures. nodes = next( (nodes for nodes in nodes_by_cell.values() if nodes and not context_module.is_cell_failure_sentinel(nodes)), objects.ComputeNodeList()) return nodes
def _get_computes_for_cells(self, context, cells, compute_uuids=None): """Get a tuple of compute node and service information. :param context: request context :param cells: list of CellMapping objects :param compute_uuids: list of ComputeNode UUIDs. If this is None, all compute nodes from each specified cell will be returned, otherwise only the ComputeNode objects with a UUID in the list of UUIDs in any given cell is returned. If this is an empty list, the returned compute_nodes tuple item will be an empty dict. Returns a tuple (compute_nodes, services) where: - compute_nodes is cell-uuid keyed dict of compute node lists - services is a dict of services indexed by hostname """ def targeted_operation(cctxt): services = objects.ServiceList.get_by_binary(cctxt, 'nova-compute', include_disabled=True) if compute_uuids is None: return services, objects.ComputeNodeList.get_all(cctxt) else: return services, objects.ComputeNodeList.get_all_by_uuids( cctxt, compute_uuids) timeout = context_module.CELL_TIMEOUT results = context_module.scatter_gather_cells(context, cells, timeout, targeted_operation) compute_nodes = collections.defaultdict(list) services = {} for cell_uuid, result in results.items(): if isinstance(result, Exception): LOG.warning('Failed to get computes for cell %s', cell_uuid) elif result is context_module.did_not_respond_sentinel: LOG.warning('Timeout getting computes for cell %s', cell_uuid) else: _services, _compute_nodes = result compute_nodes[cell_uuid].extend(_compute_nodes) services.update( {service.host: service for service in _services}) return compute_nodes, services
def get_records_sorted(self, ctx, filters, limit, marker, **kwargs): """Get a cross-cell list of records matching filters. This iterates cells in parallel generating a unified and sorted list of records as efficiently as possible. It takes care to iterate the list as infrequently as possible. We wrap the results in RecordWrapper objects so that they are sortable by heapq.merge(), which requires that the '<' operator just works. Our sorting requirements are encapsulated into the RecordSortContext provided to the constructor for this object. This function is a generator of records from the database like what you would get from instance_get_all_by_filters_sort() in the DB API. NOTE: Since we do these in parallel, a nonzero limit will be passed to each database query, although the limit will be enforced in the output of this function. Meaning, we will still query $limit from each database, but only return $limit total results. """ if marker: # A marker identifier was provided from the API. Call this # the 'global' marker as it determines where we start the # process across all cells. Look up the record in # whatever cell it is in and record the values for the # sort keys so we can find the marker instance in each # cell (called the 'local' marker). global_marker_cell, global_marker_record = self.get_marker_record( ctx, marker) global_marker_values = [global_marker_record[key] for key in self.sort_ctx.sort_keys] def do_query(cctx): """Generate RecordWrapper(record) objects from a cell. We do this inside the thread (created by scatter_gather_all_cells()) so that we return wrappers and avoid having to iterate the combined result list in the caller again. This is run against each cell by the scatter_gather routine. """ # The local marker is an identifier of a record in a cell # that is found by the special method # get_marker_by_values(). It should be the next record # in order according to the sort provided, but after the # marker instance which may have been in another cell. local_marker = None # Since the regular DB query routines take a marker and assume that # the marked record was the last entry of the previous page, we # may need to prefix it to our result query if we're not the cell # that had the actual marker record. local_marker_prefix = [] marker_id = self.marker_identifier if marker: if cctx.cell_uuid == global_marker_cell: local_marker = marker else: local_marker = self.get_marker_by_values( cctx, global_marker_values) if local_marker: if local_marker != marker: # We did find a marker in our cell, but it wasn't # the global marker. Thus, we will use it as our # marker in the main query below, but we also need # to prefix that result with this marker instance # since the result below will not return it and it # has not been returned to the user yet. Note that # we do _not_ prefix the marker instance if our # marker was the global one since that has already # been sent to the user. local_marker_filters = copy.copy(filters) if marker_id not in local_marker_filters: # If an $id filter was provided, it will # have included our marker already if this # instance is desired in the output # set. If it wasn't, we specifically query # for it. If the other filters would have # excluded it, then we'll get an empty set # here and not include it in the output as # expected. local_marker_filters[marker_id] = [local_marker] local_marker_prefix = self.get_by_filters( cctx, local_marker_filters, limit=1, marker=None, **kwargs) else: # There was a global marker but everything in our # cell is _before_ that marker, so we return # nothing. If we didn't have this clause, we'd # pass marker=None to the query below and return a # full unpaginated set for our cell. return if local_marker_prefix: # Per above, if we had a matching marker object, that is # the first result we should generate. yield RecordWrapper(cctx, self.sort_ctx, local_marker_prefix[0]) # If a batch size was provided, use that as the limit per # batch. If not, then ask for the entire $limit in a single # batch. batch_size = self.batch_size or limit # Keep track of how many we have returned in all batches return_count = 0 # If limit was unlimited then keep querying batches until # we run out of results. Otherwise, query until the total count # we have returned exceeds the limit. while limit is None or return_count < limit: batch_count = 0 # Do not query a full batch if it would cause our total # to exceed the limit if limit: query_size = min(batch_size, limit - return_count) else: query_size = batch_size # Get one batch query_result = self.get_by_filters( cctx, filters, limit=query_size or None, marker=local_marker, **kwargs) # Yield wrapped results from the batch, counting as we go # (to avoid traversing the list to count). Also, update our # local_marker each time so that local_marker is the end of # this batch in order to find the next batch. for item in query_result: local_marker = item[self.marker_identifier] yield RecordWrapper(cctx, self.sort_ctx, item) batch_count += 1 # No results means we are done for this cell if not batch_count: break return_count += batch_count LOG.debug(('Listed batch of %(batch)i results from cell ' 'out of %(limit)s limit. Returned %(total)i ' 'total so far.'), {'batch': batch_count, 'total': return_count, 'limit': limit or 'no'}) # NOTE(danms): The calls to do_query() will return immediately # with a generator. There is no point in us checking the # results for failure or timeout since we have not actually # run any code in do_query() until the first iteration # below. The query_wrapper() utility handles inline # translation of failures and timeouts to sentinels which will # be generated and consumed just like any normal result below. if self.cells: results = context.scatter_gather_cells(ctx, self.cells, context.CELL_TIMEOUT, query_wrapper, do_query) else: results = context.scatter_gather_all_cells(ctx, query_wrapper, do_query) # If a limit was provided, it was passed to the per-cell query # routines. That means we have NUM_CELLS * limit items across # results. So, we need to consume from that limit below and # stop returning results. Call that total_limit since we will # modify it in the loop below, but do_query() above also looks # at the original provided limit. total_limit = limit or 0 # Generate results from heapq so we can return the inner # instance instead of the wrapper. This is basically free # as it works as our caller iterates the results. feeder = heapq.merge(*results.values()) while True: try: item = next(feeder) except StopIteration: return if context.is_cell_failure_sentinel(item._db_record): if not CONF.api.list_records_by_skipping_down_cells: raise exception.NovaException( _('Cell %s is not responding but configuration ' 'indicates that we should fail.') % item.cell_uuid) LOG.warning('Cell %s is not responding and hence is ' 'being omitted from the results', item.cell_uuid) if item._db_record == context.did_not_respond_sentinel: self._cells_timed_out.add(item.cell_uuid) elif isinstance(item._db_record, Exception): self._cells_failed.add(item.cell_uuid) # We might have received one batch but timed out or failed # on a later one, so be sure we fix the accounting. if item.cell_uuid in self._cells_responded: self._cells_responded.remove(item.cell_uuid) continue yield item._db_record self._cells_responded.add(item.cell_uuid) total_limit -= 1 if total_limit == 0: # We'll only hit this if limit was nonzero and we just # generated our last one return
def get_records_sorted(self, ctx, filters, limit, marker, **kwargs): """Get a cross-cell list of records matching filters. This iterates cells in parallel generating a unified and sorted list of records as efficiently as possible. It takes care to iterate the list as infrequently as possible. We wrap the results in RecordWrapper objects so that they are sortable by heapq.merge(), which requires that the '<' operator just works. Our sorting requirements are encapsulated into the RecordSortContext provided to the constructor for this object. This function is a generator of records from the database like what you would get from instance_get_all_by_filters_sort() in the DB API. NOTE: Since we do these in parallel, a nonzero limit will be passed to each database query, although the limit will be enforced in the output of this function. Meaning, we will still query $limit from each database, but only return $limit total results. """ if marker: # A marker identifier was provided from the API. Call this # the 'global' marker as it determines where we start the # process across all cells. Look up the record in # whatever cell it is in and record the values for the # sort keys so we can find the marker instance in each # cell (called the 'local' marker). global_marker_record = self.get_marker_record(ctx, marker) global_marker_values = [ global_marker_record[key] for key in self.sort_ctx.sort_keys ] def do_query(ctx): """Generate RecordWrapper(record) objects from a cell. We do this inside the thread (created by scatter_gather_all_cells()) so that we return wrappers and avoid having to iterate the combined result list in the caller again. This is run against each cell by the scatter_gather routine. """ # The local marker is an identifier of a record in a cell # that is found by the special method # get_marker_by_values(). It should be the next record # in order according to the sort provided, but after the # marker instance which may have been in another cell. local_marker = None # Since the regular DB query routines take a marker and assume that # the marked record was the last entry of the previous page, we # may need to prefix it to our result query if we're not the cell # that had the actual marker record. local_marker_prefix = [] marker_id = self.marker_identifier if marker: # FIXME(danms): If we knew which cell we were in here, we could # avoid looking up the marker again. But, we don't currently. local_marker = self.get_marker_by_values( ctx, global_marker_values) if local_marker: if local_marker != marker: # We did find a marker in our cell, but it wasn't # the global marker. Thus, we will use it as our # marker in the main query below, but we also need # to prefix that result with this marker instance # since the result below will not return it and it # has not been returned to the user yet. Note that # we do _not_ prefix the marker instance if our # marker was the global one since that has already # been sent to the user. local_marker_filters = copy.copy(filters) if marker_id not in local_marker_filters: # If an $id filter was provided, it will # have included our marker already if this # instance is desired in the output # set. If it wasn't, we specifically query # for it. If the other filters would have # excluded it, then we'll get an empty set # here and not include it in the output as # expected. local_marker_filters[marker_id] = [local_marker] local_marker_prefix = self.get_by_filters( ctx, local_marker_filters, limit=1, marker=None, **kwargs) else: # There was a global marker but everything in our # cell is _before_ that marker, so we return # nothing. If we didn't have this clause, we'd # pass marker=None to the query below and return a # full unpaginated set for our cell. return [] main_query_result = self.get_by_filters(ctx, filters, limit=limit, marker=local_marker, **kwargs) return (RecordWrapper(self.sort_ctx, inst) for inst in itertools.chain( local_marker_prefix, main_query_result)) # NOTE(tssurya): When the below routine provides sentinels to indicate # a timeout on a cell, we ignore that cell to avoid the crash when # doing the merge below and continue merging the results from the 'up' # cells. # TODO(tssurya): Modify this to return the minimal available info from # the down cells. if self.cells: results = context.scatter_gather_cells(ctx, self.cells, 60, do_query) else: results = context.scatter_gather_all_cells(ctx, do_query) for cell_uuid in list(results): if results[cell_uuid] in (context.did_not_respond_sentinel, context.raised_exception_sentinel): LOG.warning( "Cell %s is not responding and hence skipped " "from the results.", cell_uuid) results.pop(cell_uuid) # If a limit was provided, it was passed to the per-cell query # routines. That means we have NUM_CELLS * limit items across # results. So, we need to consume from that limit below and # stop returning results. limit = limit or 0 # Generate results from heapq so we can return the inner # instance instead of the wrapper. This is basically free # as it works as our caller iterates the results. for i in heapq.merge(*results.values()): yield i._db_record limit -= 1 if limit == 0: # We'll only hit this if limit was nonzero and we just # generated our last one return