def test_with_distinct_counts_raises_when_queryset_includes_unsupported_options( self): """ Verify that an error is raised if the original queryset includes options that are not supported by our custom Search class. """ dc_queryset = DistinctCountsSearchQuerySet.from_queryset( DSLFacetedSearch()) with pytest.raises(RuntimeError) as err: facet_field = 'start' agg_filter = ESDSLQ('match_all') agg = DateHistogramFacet(field=facet_field, interval='month') dc_queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket( facet_field, agg.get_aggregation()) dc_queryset.with_distinct_counts('aggregation_key') assert str( err.value ) == 'DistinctCountsSearchQuerySet does not support date facets.' dc_queryset = DistinctCountsSearchQuerySet.from_queryset( DSLFacetedSearch()) with pytest.raises(RuntimeError) as err: facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field, order='term') dc_queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket( facet_field, agg.get_aggregation()) dc_queryset.with_distinct_counts('aggregation_key') assert 'DistinctCountsSearchQuerySet only supports a limited set of field facet options.' in str( err.value)
def test_run_executes_the_query_and_caches_the_results(self): """ Verify that run executes the query and caches the results.""" course_1 = CourseFactory() run_1 = CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course_1) run_2 = CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course_1) course_2 = CourseFactory() run_3 = CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course_2) CourseRunFactory(title='bar', pacing_type='instructor_paced', hidden=False, course=course_2) queryset = DistinctCountsSearchQuerySet( index=CourseRunDocument._index._name).filter('term', title='foo') queryset.aggregation_key = 'aggregation_key' facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) assert queryset._distinct_result_count is None assert not hasattr(self, '_response') queryset.execute() expected_results = sorted([run_1.key, run_2.key, run_3.key]) actual_results = sorted([run.key for run in queryset._response.hits]) assert queryset._distinct_result_count == 2 assert queryset._response.hits.total['value'] == 3 assert expected_results == actual_results facet_counts = queryset._response.facets for field_val, count, distinct_count in facet_counts['fields'][ 'pacing_type']: assert field_val in {'self_paced', 'instructor_paced'} if field_val == 'self_paced': assert count == 2 and distinct_count == 1 elif field_val == 'instructor_paced': assert count == 1 and distinct_count == 1 count, distinct_count = facet_counts['queries']['hidden'] assert count == 2 and distinct_count == 1
def test_facet_counts_caches_results(self): """ Verify that facet_counts cache results when it is forced to run the query.""" course = CourseFactory() runs = [ CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course), CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course), CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course), ] queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') # This should force the query to execute, and the results to be cached facet_counts = dc_queryset.facet_counts() with mock.patch.object(DistinctCountsElasticsearchQueryWrapper, 'search') as mock_search: # Calling facet_counts again shouldn't result in an additional query cached_facet_counts = dc_queryset.facet_counts() assert not mock_search.called assert facet_counts == cached_facet_counts # Calling count shouldn't result in another query, as we should have already cached it with the # first request. count = dc_queryset.count() assert not mock_search.called assert count == len(runs) # Fetching the results shouldn't result in another query, as we should have already cached them # with the initial request. results = dc_queryset.execute() assert not mock_search.called expected = {run.key for run in runs} actual = {run.key for run in results} assert expected == actual
def test_facet_counts_includes_distinct_counts(self): """ Verify that facet_counts include distinct counts. """ course = CourseFactory() CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course) CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course) CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course) # Make sure to add both a field facet and a query facet so that we can be sure that both work. queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') facet_counts = dc_queryset.facet_counts() # Field facets are expected to be formatted as a list of three-tuples (field_value, count, distinct_count) for val, count, distinct_count in facet_counts['fields'][ 'pacing_type']: assert val in {'self_paced', 'instructor_paced'} if val == 'self_paced': assert count == 2 assert distinct_count == 1 elif val == 'instructor_paced': assert count == 1 assert distinct_count == 1 # Query facets are expected to be formatted as a dictionary mapping facet_names to two-tuples (count, # distinct_count) hidden_count, hidden_distinct_count = facet_counts['queries']['hidden'] assert hidden_count == 2 assert hidden_distinct_count == 1
def test_current_run_boosting(self, runadates, runbdates, pacing_type, boosted): """Verify that "current" CourseRuns are boosted. See the is_current_and_still_upgradeable CourseRun property to understand what this means.""" (starta, enda) = runadates (startb, endb) = runbdates now = datetime.datetime.now(pytz.timezone('utc')) upgrade_deadline_tomorrow = now + relativedelta(days=1) with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=upgrade_deadline_tomorrow): runa = self.build_normalized_course_run(title='test1', start=starta, end=enda, pacing_type=pacing_type) runb = self.build_normalized_course_run(title='test2', start=startb, end=endb, pacing_type=pacing_type) search_results = CourseRunDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 if boosted == 'a': assert search_results[0].meta['score'] > search_results[1].meta[ 'score'] assert runa.title == search_results[0].title else: assert search_results[0].meta['score'] > search_results[1].meta[ 'score'] assert runb.title == search_results[0].title
def contains(self, request): """ Determine if course runs are found in the query results. A dictionary mapping course run keys to booleans, indicating course run presence, will be returned. --- serializer: serializers.ContainedCourseRunsSerializer parameters: - name: query description: Elasticsearch querystring query required: true type: string paramType: query multiple: false - name: course_run_ids description: Comma-separated list of course run IDs required: true type: string paramType: query multiple: true - name: partner description: Filter by partner required: false type: string paramType: query multiple: false """ query = request.GET.get('query') course_run_ids = request.GET.get('course_run_ids') partner = self.request.site.partner if query and course_run_ids: course_run_ids = course_run_ids.split(',') course_runs = (CourseRun.search(query).filter( ESDSLQ('term', partner=partner.short_code) & ESDSLQ('terms', **{'key.raw': course_run_ids})).source( ['key'])) course_runs_keys = [i.key for i in course_runs] contains = { course_run_id: course_run_id in course_runs_keys for course_run_id in course_run_ids } instance = {'course_runs': contains} serializer = serializers.ContainedCourseRunsSerializer(instance) return Response(serializer.data) return Response(status=status.HTTP_400_BAD_REQUEST)
def construct_query_filter_facets(self, request, view): facets = {} faceted_query_search_fields = self.prepare_faceted_query_search_fields( view) for name, options in faceted_query_search_fields.items(): if options['enabled']: facets[name] = ESDSLQ('bool', filter=options['query']) return facets
def get(self, request): """ Determine if a set of courses and/or course runs is found in the query results. Returns dict: mapping of course and run indentifiers included in the request to boolean values indicating whether or not the associated course or run is contained in the queryset described by the query found in the request. """ query = request.GET.get('query') course_run_ids = request.GET.get('course_run_ids', None) course_uuids = request.GET.get('course_uuids', None) partner = self.request.site.partner if query and (course_run_ids or course_uuids): identified_course_ids = set() specified_course_ids = [] if course_run_ids: course_run_ids = course_run_ids.split(',') specified_course_ids = course_run_ids identified_course_ids.update( i.key for i in CourseRun.search(query).filter( ESDSLQ('term', partner=partner.short_code) | ESDSLQ('terms', **{'key.raw': course_run_ids})). source(['key'])) if course_uuids: course_uuids = [ UUID(course_uuid) for course_uuid in course_uuids.split(',') ] specified_course_ids += course_uuids identified_course_ids.update( Course.search(query).filter( partner=partner, uuid__in=course_uuids).values_list('uuid', flat=True)) contains = { str(identifier): identifier in identified_course_ids for identifier in specified_course_ids } return Response(contains) return Response( 'CatalogQueryContains endpoint requires query and identifiers list(s)', status=status.HTTP_400_BAD_REQUEST)
def _get_query_results(self): """ Returns the results of this Catalog's query. Returns: SearchQuerySet """ dsl_query = ESDSLQ('query_string', query=self.query, analyze_wildcard=True) return CourseDocument.search().query(dsl_query)
def test_self_paced_boosting(self): """Verify that self paced courses are boosted over instructor led courses.""" self.build_normalized_course_run(pacing_type='instructor_paced') test_record = self.build_normalized_course_run( pacing_type='self_paced') search_results = CourseRunDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 assert search_results[0].meta['score'] > search_results[1].meta['score'] assert test_record.pacing_type == search_results[0].pacing_type
def test_program_type_boosting(self, program_type): """Verify MicroMasters and Professional Certificate are boosted over XSeries.""" ProgramFactory(type=ProgramType.objects.get( translations__name_t='XSeries')) test_record = ProgramFactory(type=ProgramType.objects.get( translations__name_t=program_type)) search_results = ProgramDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 assert search_results[0].meta['score'] > search_results[1].meta['score'] assert str(test_record.type) == str(search_results[0].type)
def test_build_search_kwargs_does_not_include_facet_clause(self): """ Verify that a facets clause is not included with search kwargs.""" queryset = DistinctCountsSearchQuerySet( index=CourseRunDocument._index._name) facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) querystring_params = queryset.to_dict() backend = DistinctCountsElasticsearchQueryWrapper( queryset, 'aggregation_key') search_kwargs = backend._build_search_kwargs(**querystring_params) assert 'facets' not in search_kwargs assert 'aggs' in search_kwargs
def filter_by_facets(self, request, queryset, view): filter_params = self.prepare_faceted_field_filter_params(request) _filters = [] field_facets = view.faceted_search_fields for field, value in filter_params: if not field_facets.get(field): raise ParseError( 'The selected query facet [{facet}] is not valid.'.format( facet=field)) _filters.append(ESDSLQ('term', **{field: value})) queryset = queryset.query('bool', **{'filter': _filters}) return queryset
def test_start_date_boosting(self): """Verify upcoming courses are boosted over past courses.""" now = datetime.datetime.now(pytz.timezone('utc')) self.build_normalized_course_run(start=now + datetime.timedelta(weeks=10)) test_record = self.build_normalized_course_run( start=now + datetime.timedelta(weeks=1)) search_results = CourseRunDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 assert search_results[0].meta['score'] > search_results[1].meta['score'] assert int(test_record.start.timestamp()) == int( search_results[0].start.timestamp())
class FacetQueryFieldsMixin: """ Facet query fields mixin. Provides faceted query filter fields. Query cases: - availability_current - availability_starting_soon - availability_upcoming - availability_archived """ faceted_query_filter_fields = { 'availability_current': { 'query': [ ESDSLQ('range', start={"lte": "now"}), ESDSLQ('range', end={"gte": "now"}) ], 'enabled': True, }, 'availability_starting_soon': { 'query': [ESDSLQ('range', start={ "lte": "now+60d", "gte": "now" })], 'enabled': True, }, 'availability_upcoming': { 'query': [ESDSLQ('range', start={"gte": "now+60d"})], 'enabled': True }, 'availability_archived': { 'query': [ESDSLQ('range', end={"lte": "now"})], 'enabled': True }, }
def test_search_raises_when_called_with_date_facet(self): queryset = DistinctCountsSearchQuerySet( index=CourseRunDocument._index._name) facet_field = 'start' agg_filter = ESDSLQ('match_all') agg = DateHistogramFacet(field=facet_field, interval='month') queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) backend = DistinctCountsElasticsearchQueryWrapper( queryset, 'aggregation_key') querystring = queryset.to_dict() with pytest.raises(RuntimeError) as err: backend.search(querystring) assert 'does not support date facets' in str(err.value)
def test_clone(self): """ Verify that clone copies all fields, including the aggregation_key and distinct_hit_count.""" queryset = DistinctCountsSearchQuerySet() facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggregation_key = 'aggregation_key' queryset._distinct_result_count = 123 clone = queryset._clone() assert queryset.to_dict() == clone.to_dict() assert queryset.aggregation_key == clone.aggregation_key assert queryset._distinct_result_count == clone._distinct_result_count
def test_clone_with_different_class(self): """ Verify that clone does not copy aggregation_key and distinct_result_count when using different class.""" queryset = DistinctCountsSearchQuerySet() facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggregation_key = 'aggregation_key' queryset._distinct_result_count = 123 clone = queryset._clone(klass=Search) assert isinstance(clone, Search) assert queryset.to_dict() == clone.to_dict() assert not hasattr(clone, 'aggregation_key') assert not hasattr(clone, '_distinct_result_count')
def test_search_raises_when_called_with_unsupported_field_facet_option( self): queryset = DistinctCountsSearchQuerySet( index=CourseRunDocument._index._name) facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field, order='term') queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) backend = DistinctCountsElasticsearchQueryWrapper( queryset, 'aggregation_key') querystring = queryset.to_dict() with pytest.raises(RuntimeError) as err: backend.search(querystring) assert 'only supports a limited set of field facet options' in str( err.value)
def test_validate_raises_when_configured_with_date_facet(self): """ Verify that validate raises when Query configured with date facet.""" # The add date facet action on DistinctCountsSearchQuerySet raises, so configure a date facet # on a normal FacetedSearch or Search and then clone it to a DistinctCountsSearchQuerySet. queryset = FacetedSearch(index=CourseRunDocument._index._name) facet_field = 'start' agg_filter = ESDSLQ('match_all') agg = DateHistogramFacet(field=facet_field, interval='month') queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset = queryset._clone(klass=DistinctCountsSearchQuerySet) queryset.aggregation_key = 'aggregation_key' with pytest.raises(RuntimeError) as err: queryset.validate() assert 'does not support date facets' in str(err.value)
def test_expired_paid_seat_penalized(self): """ Verify that a course run with an expired, paid seat is penalized relative to one with an enrollable, paid seat. """ now = datetime.datetime.now(pytz.timezone('utc')) future = now + datetime.timedelta(days=15) with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=True): with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=future): promoted_run = self.build_normalized_course_run( title='promoted') past = now - datetime.timedelta(days=15) with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=True): with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=past): penalized_run = self.build_normalized_course_run( title='penalized') search_results = CourseRunDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 assert [promoted_run.title, penalized_run.title] == [hit.title for hit in search_results] assert search_results[0].meta['score'] > search_results[1].meta['score'] assert search_results[0].meta['score'] > 20 # Verify that this result has an initial score (~1.0). # Course runs with expired paid seats are penalized by having a relatively large value added # to all another(not expired) courses their relevance score. # In this test case, the result should be an initial relevance score for the expired course. assert round(search_results[1].meta['score']) == 1
def test_validate_raises_when_configured_with_facet_with_unsupported_options( self): """ Verify that validate raises when Query configured with facet with unsupported options.""" # The add date facet action on DistinctCountsSearchQuerySet raises when unsupported options are passed, # so configure a field facet with those options on a normal FacetedSearch or Search and then clone # it to a DistinctCountsSearchQuery. queryset = DistinctCountsSearchQuerySet( index=CourseRunDocument._index._name) facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field, order='term') queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset = queryset._clone(klass=DistinctCountsSearchQuerySet) queryset.aggregation_key = 'aggregation_key' with pytest.raises(RuntimeError) as err: queryset.validate() assert 'only supports a limited set of field facet options' in str( err.value)
def test_enrollable_paid_seat_boosting(self, has_enrollable_paid_seats, paid_seat_enrollment_end, expects_boost): """ Verify that CourseRuns for which an unenrolled user may enroll and purchase a paid Seat are boosted. """ # Create a control record (one that should never be boosted). with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=False): with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=None): self.build_normalized_course_run(title='test1') # Create the test record (may be boosted). with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=has_enrollable_paid_seats): with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=paid_seat_enrollment_end): test_record = self.build_normalized_course_run(title='test2') search_results = CourseRunDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 if expects_boost: assert search_results[0].meta['score'] > search_results[1].meta[ 'score'] assert test_record.title == search_results[0].title else: assert search_results[0].meta['score'] == search_results[1].meta[ 'score']
def test_enrollable_course_run_boosting(self, enrollment_start, enrollment_end, expects_boost): """Verify that enrollable CourseRuns are boosted.""" # Create a control record that should never be boosted self.build_normalized_course_run(title='test1') # Create the test record test_record = self.build_normalized_course_run( title='test2', enrollment_start=enrollment_start, enrollment_end=enrollment_end) search_results = CourseRunDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == 2 if expects_boost: assert search_results[0].meta['score'] > search_results[1].meta[ 'score'] assert test_record.title == search_results[0].title else: assert search_results[0].meta['score'] == search_results[1].meta[ 'score']
def get_results(self, query, partner): course_runs = search_documents.CourseRunDocument.search().query( ESDSLQ( 'bool', minimum_should_match=1, should=[ ESDSLQ('match', title__edge_ngram_completion=query), ESDSLQ('match', title__suggest=query), ESDSLQ('match', title=query), ESDSLQ('match', course_key=query), ESDSLQ( 'match', authoring_organizations__edge_ngram_completion=query), ], filter=[ ESDSLQ('term', published=True), ESDSLQ('term', partner=partner.short_code) ], must_not=ESDSLQ('term', hidden=True), )) # Get first three results after deduplicating by course key. seen_course_keys, course_run_list = set(), [] for course_run in course_runs: course_key = course_run.course_key if course_key in seen_course_keys: continue seen_course_keys.add(course_key) course_run_list.append(course_run) if len(course_run_list) == self.RESULT_COUNT: break programs = search_documents.ProgramDocument.search().query( ESDSLQ( 'bool', minimum_should_match=1, should=[ ESDSLQ('match', title__edge_ngram_completion=query), ESDSLQ('match', title__suggest=query), ESDSLQ('match', title=query), ESDSLQ( 'match', authoring_organizations__edge_ngram_completion=query), ], filter=[ ESDSLQ('term', status=ProgramStatus.Active), ESDSLQ('term', partner=partner.short_code) ], must_not=[ESDSLQ('term', hidden=True)], )) programs = programs[:self.RESULT_COUNT] return course_run_list, programs
def test_fetch_all_courses(self): search_results = CourseDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == self.total_courses
def test_fetch_courses_when_no_dsl_load_per_query_settings(self): del settings.ELASTICSEARCH_DSL_LOAD_PER_QUERY search_results = CourseDocument.search().query( ESDSLQ('match_all')).execute() assert len(search_results) == DEFAULT_SIZE
def test_fetch_courses_with_specific_size(self): desired_size = 25 search_results = CourseDocument.search().query( ESDSLQ('match_all'))[:desired_size].execute() assert len(search_results) == desired_size