def test_with_distinct_counts_raises_when_queryset_includes_unsupported_options( self): """ Verify that an error is raised if the original queryset includes options that are not supported by our custom Search class. """ dc_queryset = DistinctCountsSearchQuerySet.from_queryset( DSLFacetedSearch()) with pytest.raises(RuntimeError) as err: facet_field = 'start' agg_filter = ESDSLQ('match_all') agg = DateHistogramFacet(field=facet_field, interval='month') dc_queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket( facet_field, agg.get_aggregation()) dc_queryset.with_distinct_counts('aggregation_key') assert str( err.value ) == 'DistinctCountsSearchQuerySet does not support date facets.' dc_queryset = DistinctCountsSearchQuerySet.from_queryset( DSLFacetedSearch()) with pytest.raises(RuntimeError) as err: facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field, order='term') dc_queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket( facet_field, agg.get_aggregation()) dc_queryset.with_distinct_counts('aggregation_key') assert 'DistinctCountsSearchQuerySet only supports a limited set of field facet options.' in str( err.value)
def test_distinct_count_returns_cached_distinct_count(self): """ Verify that distinct_count returns the cached distinct_result_count when present.""" queryset = DSLFacetedSearch() dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') dc_queryset._distinct_result_count = 123 # pylint: disable=protected-access assert dc_queryset.distinct_count() == 123
def test_facet_counts_caches_results(self): """ Verify that facet_counts cache results when it is forced to run the query.""" course = CourseFactory() runs = [ CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course), CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course), CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course), ] queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') # This should force the query to execute, and the results to be cached facet_counts = dc_queryset.facet_counts() with mock.patch.object(DistinctCountsElasticsearchQueryWrapper, 'search') as mock_search: # Calling facet_counts again shouldn't result in an additional query cached_facet_counts = dc_queryset.facet_counts() assert not mock_search.called assert facet_counts == cached_facet_counts # Calling count shouldn't result in another query, as we should have already cached it with the # first request. count = dc_queryset.count() assert not mock_search.called assert count == len(runs) # Fetching the results shouldn't result in another query, as we should have already cached them # with the initial request. results = dc_queryset.execute() assert not mock_search.called expected = {run.key for run in runs} actual = {run.key for run in results} assert expected == actual
def test_with_distinct_counts(self): """ Verify that the query object is converted to a DistinctCountsSearchQuerySet and the aggregation_key is configured properly. """ queryset = DSLFacetedSearch() dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') assert isinstance(dc_queryset, DistinctCountsSearchQuerySet) assert dc_queryset.aggregation_key == 'aggregation_key'
def test_distinct_count_raises_when_not_properly_configured(self): """ Verify that distinct_count raises when called without configuring the DSLFacetedSearch to compute distinct counts. """ queryset = DSLFacetedSearch() dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset) with pytest.raises(AttributeError) as err: dc_queryset.distinct_count() assert "object has no attribute 'distinct_count'" in str(err.value)
def test_facet_counts_includes_distinct_counts(self): """ Verify that facet_counts include distinct counts. """ course = CourseFactory() CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course) CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course) CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course) # Make sure to add both a field facet and a query facet so that we can be sure that both work. queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') facet_counts = dc_queryset.facet_counts() # Field facets are expected to be formatted as a list of three-tuples (field_value, count, distinct_count) for val, count, distinct_count in facet_counts['fields'][ 'pacing_type']: assert val in {'self_paced', 'instructor_paced'} if val == 'self_paced': assert count == 2 assert distinct_count == 1 elif val == 'instructor_paced': assert count == 1 assert distinct_count == 1 # Query facets are expected to be formatted as a dictionary mapping facet_names to two-tuples (count, # distinct_count) hidden_count, hidden_distinct_count = facet_counts['queries']['hidden'] assert hidden_count == 2 assert hidden_distinct_count == 1
def test_from_queryset(self): """ Verify that a DistinctCountsSearchQuerySet can be built from an existing SearchQuerySet.""" course_1 = CourseFactory() CourseRunFactory(title='foo', course=course_1) CourseRunFactory(title='foo', course=course_1) course_2 = CourseFactory() CourseRunFactory(title='foo', course=course_2) CourseRunFactory(title='bar', course=course_2) queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset) expected = sorted([run.key for run in queryset]) actual = sorted([run.key for run in dc_queryset]) assert expected == actual
def test_distinct_count_runs_query_when_cache_is_empty(self): """ Verify that distinct_count runs the query, caches, and returns the distinct_count when cache is empty.""" course_1 = CourseFactory() CourseRunFactory(title='foo', course=course_1) CourseRunFactory(title='foo', course=course_1) course_2 = CourseFactory() CourseRunFactory(title='foo', course=course_2) CourseRunFactory(title='bar', course=course_2) queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') assert dc_queryset._distinct_result_count is None # pylint: disable=protected-access assert dc_queryset.distinct_count() == 2 assert dc_queryset._distinct_result_count == 2 # pylint: disable=protected-access
def get_queryset(self, *args, **kwargs): """ Return the base Queryset to use to build up the search query.""" queryset = super().get_queryset(*args, **kwargs) return DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key')