def test_with_distinct_counts_raises_when_queryset_includes_unsupported_options(
            self):
        """
        Verify that an error is raised if the original queryset includes options that are not supported by our
        custom Search class.
        """
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            DSLFacetedSearch())
        with pytest.raises(RuntimeError) as err:
            facet_field = 'start'
            agg_filter = ESDSLQ('match_all')
            agg = DateHistogramFacet(field=facet_field, interval='month')
            dc_queryset.aggs.bucket('_filter_' + facet_field,
                                    'filter',
                                    filter=agg_filter).bucket(
                                        facet_field, agg.get_aggregation())
            dc_queryset.with_distinct_counts('aggregation_key')
        assert str(
            err.value
        ) == 'DistinctCountsSearchQuerySet does not support date facets.'

        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            DSLFacetedSearch())
        with pytest.raises(RuntimeError) as err:
            facet_field = 'pacing_type'
            agg_filter = ESDSLQ('match_all')
            agg = TermsFacet(field=facet_field, order='term')
            dc_queryset.aggs.bucket('_filter_' + facet_field,
                                    'filter',
                                    filter=agg_filter).bucket(
                                        facet_field, agg.get_aggregation())
            dc_queryset.with_distinct_counts('aggregation_key')
        assert 'DistinctCountsSearchQuerySet only supports a limited set of field facet options.' in str(
            err.value)
    def test_distinct_count_returns_cached_distinct_count(self):
        """ Verify that distinct_count returns the cached distinct_result_count when present."""
        queryset = DSLFacetedSearch()
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')

        dc_queryset._distinct_result_count = 123  # pylint: disable=protected-access
        assert dc_queryset.distinct_count() == 123
    def test_facet_counts_caches_results(self):
        """ Verify that facet_counts cache results when it is forced to run the query."""
        course = CourseFactory()
        runs = [
            CourseRunFactory(title='foo',
                             pacing_type='self_paced',
                             hidden=True,
                             course=course),
            CourseRunFactory(title='foo',
                             pacing_type='self_paced',
                             hidden=True,
                             course=course),
            CourseRunFactory(title='foo',
                             pacing_type='instructor_paced',
                             hidden=False,
                             course=course),
        ]

        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        queryset.aggs.bucket('_query_{0}'.format('hidden'),
                             'filter',
                             filter=ESDSLQ('bool',
                                           filter=ESDSLQ('term', hidden=True)))
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')
        # This should force the query to execute, and the results to be cached
        facet_counts = dc_queryset.facet_counts()

        with mock.patch.object(DistinctCountsElasticsearchQueryWrapper,
                               'search') as mock_search:
            # Calling facet_counts again shouldn't result in an additional query
            cached_facet_counts = dc_queryset.facet_counts()
            assert not mock_search.called
            assert facet_counts == cached_facet_counts

            # Calling count shouldn't result in another query, as we should have already cached it with the
            # first request.
            count = dc_queryset.count()
            assert not mock_search.called
            assert count == len(runs)

            # Fetching the results shouldn't result in another query, as we should have already cached them
            # with the initial request.
            results = dc_queryset.execute()
            assert not mock_search.called
            expected = {run.key for run in runs}
            actual = {run.key for run in results}
            assert expected == actual
 def test_with_distinct_counts(self):
     """
     Verify that the query object is converted to a DistinctCountsSearchQuerySet and the aggregation_key is
     configured properly.
     """
     queryset = DSLFacetedSearch()
     dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
         queryset).with_distinct_counts('aggregation_key')
     assert isinstance(dc_queryset, DistinctCountsSearchQuerySet)
     assert dc_queryset.aggregation_key == 'aggregation_key'
    def test_distinct_count_raises_when_not_properly_configured(self):
        """
        Verify that distinct_count raises when called without configuring the DSLFacetedSearch to compute distinct
        counts.
        """
        queryset = DSLFacetedSearch()
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset)

        with pytest.raises(AttributeError) as err:
            dc_queryset.distinct_count()
        assert "object has no attribute 'distinct_count'" in str(err.value)
    def test_facet_counts_includes_distinct_counts(self):
        """ Verify that facet_counts include distinct counts. """
        course = CourseFactory()
        CourseRunFactory(title='foo',
                         pacing_type='self_paced',
                         hidden=True,
                         course=course)
        CourseRunFactory(title='foo',
                         pacing_type='self_paced',
                         hidden=True,
                         course=course)
        CourseRunFactory(title='foo',
                         pacing_type='instructor_paced',
                         hidden=False,
                         course=course)

        # Make sure to add both a field facet and a query facet so that we can be sure that both work.
        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        queryset.aggs.bucket('_query_{0}'.format('hidden'),
                             'filter',
                             filter=ESDSLQ('bool',
                                           filter=ESDSLQ('term', hidden=True)))
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')
        facet_counts = dc_queryset.facet_counts()

        # Field facets are expected to be formatted as a list of three-tuples (field_value, count, distinct_count)
        for val, count, distinct_count in facet_counts['fields'][
                'pacing_type']:
            assert val in {'self_paced', 'instructor_paced'}
            if val == 'self_paced':
                assert count == 2
                assert distinct_count == 1
            elif val == 'instructor_paced':
                assert count == 1
                assert distinct_count == 1

        # Query facets are expected to be formatted as a dictionary mapping facet_names to two-tuples (count,
        # distinct_count)
        hidden_count, hidden_distinct_count = facet_counts['queries']['hidden']
        assert hidden_count == 2
        assert hidden_distinct_count == 1
    def test_from_queryset(self):
        """ Verify that a DistinctCountsSearchQuerySet can be built from an existing SearchQuerySet."""
        course_1 = CourseFactory()
        CourseRunFactory(title='foo', course=course_1)
        CourseRunFactory(title='foo', course=course_1)

        course_2 = CourseFactory()
        CourseRunFactory(title='foo', course=course_2)
        CourseRunFactory(title='bar', course=course_2)
        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset)

        expected = sorted([run.key for run in queryset])
        actual = sorted([run.key for run in dc_queryset])
        assert expected == actual
    def test_distinct_count_runs_query_when_cache_is_empty(self):
        """ Verify that distinct_count runs the query, caches, and returns the distinct_count when cache is empty."""
        course_1 = CourseFactory()
        CourseRunFactory(title='foo', course=course_1)
        CourseRunFactory(title='foo', course=course_1)

        course_2 = CourseFactory()
        CourseRunFactory(title='foo', course=course_2)
        CourseRunFactory(title='bar', course=course_2)

        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')

        assert dc_queryset._distinct_result_count is None  # pylint: disable=protected-access
        assert dc_queryset.distinct_count() == 2
        assert dc_queryset._distinct_result_count == 2  # pylint: disable=protected-access
Esempio n. 9
0
 def get_queryset(self, *args, **kwargs):
     """ Return the base Queryset to use to build up the search query."""
     queryset = super().get_queryset(*args, **kwargs)
     return DistinctCountsSearchQuerySet.from_queryset(
         queryset).with_distinct_counts('aggregation_key')