def test_with_distinct_counts_raises_when_queryset_includes_unsupported_options(
            self):
        """
        Verify that an error is raised if the original queryset includes options that are not supported by our
        custom Search class.
        """
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            DSLFacetedSearch())
        with pytest.raises(RuntimeError) as err:
            facet_field = 'start'
            agg_filter = ESDSLQ('match_all')
            agg = DateHistogramFacet(field=facet_field, interval='month')
            dc_queryset.aggs.bucket('_filter_' + facet_field,
                                    'filter',
                                    filter=agg_filter).bucket(
                                        facet_field, agg.get_aggregation())
            dc_queryset.with_distinct_counts('aggregation_key')
        assert str(
            err.value
        ) == 'DistinctCountsSearchQuerySet does not support date facets.'

        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            DSLFacetedSearch())
        with pytest.raises(RuntimeError) as err:
            facet_field = 'pacing_type'
            agg_filter = ESDSLQ('match_all')
            agg = TermsFacet(field=facet_field, order='term')
            dc_queryset.aggs.bucket('_filter_' + facet_field,
                                    'filter',
                                    filter=agg_filter).bucket(
                                        facet_field, agg.get_aggregation())
            dc_queryset.with_distinct_counts('aggregation_key')
        assert 'DistinctCountsSearchQuerySet only supports a limited set of field facet options.' in str(
            err.value)
    def test_run_executes_the_query_and_caches_the_results(self):
        """ Verify that run executes the query and caches the results."""
        course_1 = CourseFactory()
        run_1 = CourseRunFactory(title='foo',
                                 pacing_type='self_paced',
                                 hidden=True,
                                 course=course_1)
        run_2 = CourseRunFactory(title='foo',
                                 pacing_type='self_paced',
                                 hidden=True,
                                 course=course_1)

        course_2 = CourseFactory()
        run_3 = CourseRunFactory(title='foo',
                                 pacing_type='instructor_paced',
                                 hidden=False,
                                 course=course_2)
        CourseRunFactory(title='bar',
                         pacing_type='instructor_paced',
                         hidden=False,
                         course=course_2)

        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        queryset.aggregation_key = 'aggregation_key'
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())

        queryset.aggs.bucket('_query_{0}'.format('hidden'),
                             'filter',
                             filter=ESDSLQ('bool',
                                           filter=ESDSLQ('term', hidden=True)))

        assert queryset._distinct_result_count is None
        assert not hasattr(self, '_response')

        queryset.execute()
        expected_results = sorted([run_1.key, run_2.key, run_3.key])
        actual_results = sorted([run.key for run in queryset._response.hits])
        assert queryset._distinct_result_count == 2
        assert queryset._response.hits.total['value'] == 3
        assert expected_results == actual_results
        facet_counts = queryset._response.facets
        for field_val, count, distinct_count in facet_counts['fields'][
                'pacing_type']:
            assert field_val in {'self_paced', 'instructor_paced'}
            if field_val == 'self_paced':
                assert count == 2 and distinct_count == 1
            elif field_val == 'instructor_paced':
                assert count == 1 and distinct_count == 1

        count, distinct_count = facet_counts['queries']['hidden']
        assert count == 2 and distinct_count == 1
    def test_get_distinct_count_runs_query_when_cache_empty(self):
        """ Verify that distinct_count runs the query and caches/returns the distinct_count."""
        course = CourseFactory()
        CourseRunFactory(title='foo', course=course)
        CourseRunFactory(title='foo', course=course)
        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        queryset.aggregation_key = 'aggregation_key'

        assert queryset._distinct_result_count is None
        assert queryset.distinct_count() == 1
        assert queryset._distinct_result_count == 1
    def test_search_raises_when_called_with_date_facet(self):
        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name)
        facet_field = 'start'
        agg_filter = ESDSLQ('match_all')
        agg = DateHistogramFacet(field=facet_field, interval='month')
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        backend = DistinctCountsElasticsearchQueryWrapper(
            queryset, 'aggregation_key')

        querystring = queryset.to_dict()
        with pytest.raises(RuntimeError) as err:
            backend.search(querystring)
        assert 'does not support date facets' in str(err.value)
    def test_distinct_count_returns_cached_distinct_count(self):
        """ Verify that distinct_count returns the cached distinct_result_count when present."""
        queryset = DSLFacetedSearch()
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')

        dc_queryset._distinct_result_count = 123  # pylint: disable=protected-access
        assert dc_queryset.distinct_count() == 123
 def test_run_raises_when_validation_fails(self):
     """ Verify that run raises an exception when the Search is misconfigured. """
     with mock.patch.object(DistinctCountsSearchQuerySet,
                            'validate') as mock_validate:
         mock_validate.side_effect = RuntimeError('validation failed')
         with pytest.raises(RuntimeError) as err:
             DistinctCountsSearchQuerySet().execute()
         assert str(err.value) == 'validation failed'
    def test_facet_counts_caches_results(self):
        """ Verify that facet_counts cache results when it is forced to run the query."""
        course = CourseFactory()
        runs = [
            CourseRunFactory(title='foo',
                             pacing_type='self_paced',
                             hidden=True,
                             course=course),
            CourseRunFactory(title='foo',
                             pacing_type='self_paced',
                             hidden=True,
                             course=course),
            CourseRunFactory(title='foo',
                             pacing_type='instructor_paced',
                             hidden=False,
                             course=course),
        ]

        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        queryset.aggs.bucket('_query_{0}'.format('hidden'),
                             'filter',
                             filter=ESDSLQ('bool',
                                           filter=ESDSLQ('term', hidden=True)))
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')
        # This should force the query to execute, and the results to be cached
        facet_counts = dc_queryset.facet_counts()

        with mock.patch.object(DistinctCountsElasticsearchQueryWrapper,
                               'search') as mock_search:
            # Calling facet_counts again shouldn't result in an additional query
            cached_facet_counts = dc_queryset.facet_counts()
            assert not mock_search.called
            assert facet_counts == cached_facet_counts

            # Calling count shouldn't result in another query, as we should have already cached it with the
            # first request.
            count = dc_queryset.count()
            assert not mock_search.called
            assert count == len(runs)

            # Fetching the results shouldn't result in another query, as we should have already cached them
            # with the initial request.
            results = dc_queryset.execute()
            assert not mock_search.called
            expected = {run.key for run in runs}
            actual = {run.key for run in results}
            assert expected == actual
    def test_search_raises_when_called_with_unsupported_field_facet_option(
            self):
        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name)
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field, order='term')
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        backend = DistinctCountsElasticsearchQueryWrapper(
            queryset, 'aggregation_key')

        querystring = queryset.to_dict()
        with pytest.raises(RuntimeError) as err:
            backend.search(querystring)
        assert 'only supports a limited set of field facet options' in str(
            err.value)
 def test_with_distinct_counts(self):
     """
     Verify that the query object is converted to a DistinctCountsSearchQuerySet and the aggregation_key is
     configured properly.
     """
     queryset = DSLFacetedSearch()
     dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
         queryset).with_distinct_counts('aggregation_key')
     assert isinstance(dc_queryset, DistinctCountsSearchQuerySet)
     assert dc_queryset.aggregation_key == 'aggregation_key'
    def test_distinct_count_raises_when_not_properly_configured(self):
        """
        Verify that distinct_count raises when called without configuring the DSLFacetedSearch to compute distinct
        counts.
        """
        queryset = DSLFacetedSearch()
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset)

        with pytest.raises(AttributeError) as err:
            dc_queryset.distinct_count()
        assert "object has no attribute 'distinct_count'" in str(err.value)
    def test_facet_counts_includes_distinct_counts(self):
        """ Verify that facet_counts include distinct counts. """
        course = CourseFactory()
        CourseRunFactory(title='foo',
                         pacing_type='self_paced',
                         hidden=True,
                         course=course)
        CourseRunFactory(title='foo',
                         pacing_type='self_paced',
                         hidden=True,
                         course=course)
        CourseRunFactory(title='foo',
                         pacing_type='instructor_paced',
                         hidden=False,
                         course=course)

        # Make sure to add both a field facet and a query facet so that we can be sure that both work.
        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        queryset.aggs.bucket('_query_{0}'.format('hidden'),
                             'filter',
                             filter=ESDSLQ('bool',
                                           filter=ESDSLQ('term', hidden=True)))
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')
        facet_counts = dc_queryset.facet_counts()

        # Field facets are expected to be formatted as a list of three-tuples (field_value, count, distinct_count)
        for val, count, distinct_count in facet_counts['fields'][
                'pacing_type']:
            assert val in {'self_paced', 'instructor_paced'}
            if val == 'self_paced':
                assert count == 2
                assert distinct_count == 1
            elif val == 'instructor_paced':
                assert count == 1
                assert distinct_count == 1

        # Query facets are expected to be formatted as a dictionary mapping facet_names to two-tuples (count,
        # distinct_count)
        hidden_count, hidden_distinct_count = facet_counts['queries']['hidden']
        assert hidden_count == 2
        assert hidden_distinct_count == 1
    def test_build_search_kwargs_does_not_include_facet_clause(self):
        """ Verify that a facets clause is not included with search kwargs."""
        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name)
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())

        queryset.aggs.bucket('_query_{0}'.format('hidden'),
                             'filter',
                             filter=ESDSLQ('bool',
                                           filter=ESDSLQ('term', hidden=True)))
        querystring_params = queryset.to_dict()
        backend = DistinctCountsElasticsearchQueryWrapper(
            queryset, 'aggregation_key')
        search_kwargs = backend._build_search_kwargs(**querystring_params)

        assert 'facets' not in search_kwargs
        assert 'aggs' in search_kwargs
    def test_from_queryset(self):
        """ Verify that a DistinctCountsSearchQuerySet can be built from an existing SearchQuerySet."""
        course_1 = CourseFactory()
        CourseRunFactory(title='foo', course=course_1)
        CourseRunFactory(title='foo', course=course_1)

        course_2 = CourseFactory()
        CourseRunFactory(title='foo', course=course_2)
        CourseRunFactory(title='bar', course=course_2)
        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset)

        expected = sorted([run.key for run in queryset])
        actual = sorted([run.key for run in dc_queryset])
        assert expected == actual
    def test_distinct_count_runs_query_when_cache_is_empty(self):
        """ Verify that distinct_count runs the query, caches, and returns the distinct_count when cache is empty."""
        course_1 = CourseFactory()
        CourseRunFactory(title='foo', course=course_1)
        CourseRunFactory(title='foo', course=course_1)

        course_2 = CourseFactory()
        CourseRunFactory(title='foo', course=course_2)
        CourseRunFactory(title='bar', course=course_2)

        queryset = DSLFacetedSearch(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(
            queryset).with_distinct_counts('aggregation_key')

        assert dc_queryset._distinct_result_count is None  # pylint: disable=protected-access
        assert dc_queryset.distinct_count() == 2
        assert dc_queryset._distinct_result_count == 2  # pylint: disable=protected-access
    def test_run_handles_pagination(self):
        """ Verify that run supports paginated queries. """
        course_1 = CourseFactory()
        for _ in range(5):
            CourseRunFactory(title='foo', course=course_1)

        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name).filter('term', title='foo')
        queryset.aggregation_key = 'aggregation_key'

        queryset.execute()
        all_results = queryset._response.hits
        assert len(all_results) == 5
        del queryset._response
        queryset = queryset.query().extra(from_=1, size=2)
        queryset.execute()
        paginated_results = queryset._response.hits
        assert len(paginated_results) == 2

        expected = sorted([run.key for run in all_results[1:3]])
        actual = sorted([run.key for run in paginated_results])
        assert expected == actual
    def test_clone(self):
        """ Verify that clone copies all fields, including the aggregation_key and distinct_hit_count."""
        queryset = DistinctCountsSearchQuerySet()
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())

        queryset.aggregation_key = 'aggregation_key'
        queryset._distinct_result_count = 123

        clone = queryset._clone()
        assert queryset.to_dict() == clone.to_dict()
        assert queryset.aggregation_key == clone.aggregation_key
        assert queryset._distinct_result_count == clone._distinct_result_count
    def test_clone_with_different_class(self):
        """ Verify that clone does not copy aggregation_key and distinct_result_count when using different class."""
        queryset = DistinctCountsSearchQuerySet()
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field)
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        queryset.aggregation_key = 'aggregation_key'
        queryset._distinct_result_count = 123

        clone = queryset._clone(klass=Search)
        assert isinstance(clone, Search)
        assert queryset.to_dict() == clone.to_dict()
        assert not hasattr(clone, 'aggregation_key')
        assert not hasattr(clone, '_distinct_result_count')
    def test_validate_raises_when_configured_with_facet_with_unsupported_options(
            self):
        """ Verify that validate raises when Query configured with facet with unsupported options."""
        # The add date facet action on DistinctCountsSearchQuerySet raises when unsupported options are passed,
        # so configure a field facet with those options on a normal FacetedSearch or Search and then clone
        # it to a DistinctCountsSearchQuery.
        queryset = DistinctCountsSearchQuerySet(
            index=CourseRunDocument._index._name)
        facet_field = 'pacing_type'
        agg_filter = ESDSLQ('match_all')
        agg = TermsFacet(field=facet_field, order='term')
        queryset.aggs.bucket('_filter_' + facet_field,
                             'filter',
                             filter=agg_filter).bucket(facet_field,
                                                       agg.get_aggregation())
        queryset = queryset._clone(klass=DistinctCountsSearchQuerySet)
        queryset.aggregation_key = 'aggregation_key'

        with pytest.raises(RuntimeError) as err:
            queryset.validate()
        assert 'only supports a limited set of field facet options' in str(
            err.value)
Example #19
0
 def get_queryset(self, *args, **kwargs):
     """ Return the base Queryset to use to build up the search query."""
     queryset = super().get_queryset(*args, **kwargs)
     return DistinctCountsSearchQuerySet.from_queryset(
         queryset).with_distinct_counts('aggregation_key')
 def test_get_distinct_count_returns_cached_value(self):
     """ Verify that the distinct_count from the cache when present."""
     query = DistinctCountsSearchQuerySet()
     query._distinct_result_count = 123
     assert query.distinct_count() == 123
 def test_update_from_dict_search_raises(self):
     """ Verify that update_from_dict raises an exception."""
     with pytest.raises(RuntimeError) as err:
         DistinctCountsSearchQuerySet().update_from_dict()
     assert 'does not support raw queries' in str(err.value)
 def test_validate_raises_when_configured_without_aggregation_key(self):
     """ Verify that validate raises when DistinctCountsSearchQuerySet configured without aggregation_key."""
     with pytest.raises(RuntimeError) as err:
         DistinctCountsSearchQuerySet().execute()
     assert str(err.value) == 'aggregation_key is required.'