class EmailSearch(FacetedSearch): doc_types = [Email] # fields that should be searched fields = ['Subject', 'Body', 'Name'] facets = collections.OrderedDict(( # use bucket aggregations to define facets ('party', TermsFacet(field='party')), ('role_type', TermsFacet(field='role_type')), ('state', TermsFacet(field='state', size=60)), ('gender', TermsFacet(field='gender')), ('name', TermsFacet(field='name', size=50)), ('publish_month', DateHistogramFacet(field='Date', interval='month')), )) def filter(self, search): """ Over-ride default behaviour (which uses post_filter) to use filter instead. """ filters = Q('match_all') for f in itervalues(self._filters): filters &= f return search.filter(filters) def query(self, search, query): """Overriden to use bool AND by default""" if query: return search.query('multi_match', fields=self.fields, query=query, operator='and').sort('-Date') return search
class PageSearchBase(RTDFacetedSearch): facets = { 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version') } doc_types = [PageDocument] index = PageDocument._doc_type.index fields = ['title^10', 'headers^5', 'content'] def query(self, search, query): """Use a custom SimpleQueryString instead of default query.""" search = super().query(search, query) all_queries = [] # need to search for both 'and' and 'or' operations # the score of and should be higher as it satisfies both or and and for operator in ['AND', 'OR']: query_string = SimpleQueryString(query=query, fields=self.fields, default_operator=operator) all_queries.append(query_string) # run bool query with should, so it returns result where either of the query matches bool_query = Bool(should=all_queries) search = search.query(bool_query) return search
class FuzzySearch(FacetedSearch): doc_types = [ TargetIndex, ] # fields to be searched fields = [ 'target', 'annotation', 'species', 'status', 'clone', 'protein', 'genePage', 'uniprot', 'taxonClass', 'superkingdom', 'targetRole', 'batch', 'community' ] facets = { # use bucket aggregations to define facets 'superkingdom': TermsFacet(field='superkingdom'), 'current_status': TermsFacet(field='status'), 'clone_availability': TermsFacet(field='clone'), 'protein_availability': TermsFacet(field='protein') } def query(self, search, search_terms): if search_terms: q = MultiMatch( query=search_terms, fields=self.fields, #type = 'cross_fields', analyzer='standard', operator='or', fuzziness='AUTO', prefix_length=3, max_expansions=100) #transpositions = True) return search.query(q) return search '''
class LabelSearch(BaseFacetedSearch): doc_types = [LabelDocument] fields = [ 'tags', 'name', ] facets = [ ('tags', TermsFacet(field='tags', size=100)), ('country', TermsFacet(field='country', size=500, order={'_key': 'asc'})), ('type', TermsFacet(field='type', size=20, order={'_key': 'asc'})), ('established', RangeFacet(field='year_start', ranges=[ ('Before 1940\'s', (0, 1940)), ('40\'s', (1940, 1950)), ('50\'s', (1950, 1960)), ('60\'s', (1960, 1970)), ('70\'s', (1970, 1980)), ('80\'s', (1980, 1990)), ('90\'s', (1990, 2000)), ('2000\'s', (2000, 2010)), ('2010\'s', (2010, 2020)), ('This Year', (2018, 2019)), ])), ]
class PhotoSearch(FacetedSearch): index = 'photos' doc_types = [ Photo, ] fields = ['persons', 'file_name'] facets = { 'persons': TermsFacet(field='persons.raw', size=100), 'person_count': TermsFacet(field='person_count', size=20), 'tags': TermsFacet(field="file_name", size=50, exclude=[ "agr's", "place", "d", "мои", "рисунки", "фотографии", "jpg", "raw", "и", "с", "c", "у", "для", "по", "из", "на", "в" ]) } def query(self, search, query): if query: return search.query("simple_query_string", fields=self.fields, query=query, default_operator='and') return search def highlight(self, search): return search
def get_elasticsearch_facet(es, elasticsearch_field_name, field_type, time_series_vals): if field_type == 'text': # Use ".keyword" because we want aggregation on keyword field, not # term field. See # https://www.elastic.co/guide/en/elasticsearch/reference/6.2/fielddata.html#before-enabling-fielddata es_facet = TermsFacet( field=elasticsearch_field_name + '.keyword', size=1000) # TODO we will need to use paging if >1000 elif field_type == 'boolean': es_facet = TermsFacet(field=elasticsearch_field_name) else: # Assume numeric type. if time_series_vals: es_base_field_name = elasticsearch_field_name.rsplit('.', 1)[0] else: es_base_field_name = elasticsearch_field_name interval = get_bucket_interval(es, es_base_field_name, time_series_vals) # TODO: When https://github.com/elastic/elasticsearch/issues/31828 # is fixed, use AutoHistogramFacet instead. es_facet = HistogramFacet(field=elasticsearch_field_name, interval=interval) nested_facet = _maybe_get_nested_facet(elasticsearch_field_name, es_facet) if nested_facet: es_facet = nested_facet return es_facet
def test_run_executes_the_query_and_caches_the_results(self): """ Verify that run executes the query and caches the results.""" course_1 = CourseFactory() run_1 = CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course_1) run_2 = CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course_1) course_2 = CourseFactory() run_3 = CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course_2) CourseRunFactory(title='bar', pacing_type='instructor_paced', hidden=False, course=course_2) queryset = DistinctCountsSearchQuerySet( index=CourseRunDocument._index._name).filter('term', title='foo') queryset.aggregation_key = 'aggregation_key' facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) assert queryset._distinct_result_count is None assert not hasattr(self, '_response') queryset.execute() expected_results = sorted([run_1.key, run_2.key, run_3.key]) actual_results = sorted([run.key for run in queryset._response.hits]) assert queryset._distinct_result_count == 2 assert queryset._response.hits.total['value'] == 3 assert expected_results == actual_results facet_counts = queryset._response.facets for field_val, count, distinct_count in facet_counts['fields'][ 'pacing_type']: assert field_val in {'self_paced', 'instructor_paced'} if field_val == 'self_paced': assert count == 2 and distinct_count == 1 elif field_val == 'instructor_paced': assert count == 1 and distinct_count == 1 count, distinct_count = facet_counts['queries']['hidden'] assert count == 2 and distinct_count == 1
class VideoSearch(FacetedSearch): doc_types = [VideoDoc] index = settings.ES_INDEX fields = ['title^5', 'abstract^3'] facets = { 'keywords': TermsFacet(field='keywords.keyword', size=5), 'languages': TermsFacet(field='languages.keyword', size=10), 'education_levels': TermsFacet(field='education_levels.keyword', size=10), 'communities': TermsFacet(field='communities.keyword', size=10), 'year_of_available': DateHistogramFacet(field='year_of_available', interval='month', min_doc_count=0), # 'months': DateHistogramFacet( # field='created_date', # interval='month', # min_doc_count=0), } def query(self, search, query): if not query: return search # query in tags, title and body for query q = Q('multi_match', fields=['title', 'abstract'], query=query) # also find questions that have answers matching query # q |= Q( # 'has_child', # type='answer', # query=Q('match', body=query), # inner_hits={ # 'highlight': { # "pre_tags": ["[[["], # "post_tags": ["]]]"], # 'fields': {'body': {'fragment_size': 30}} # }, # '_source': False, # 'size': 1 # } # ) # take the rating field into account when sorting search = search.query( 'function_score', query=q, # functions=[SF('field_value_factor', field='keywords')] ) return search def highlight(self, search): return search
class PageSearchBase(RTDFacetedSearch): facets = { 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version') } doc_types = [PageDocument] index = PageDocument._doc_type.index fields = ['title^10', 'headers^5', 'content'] operators = ['and', 'or']
def test_facet_counts_caches_results(self): """ Verify that facet_counts cache results when it is forced to run the query.""" course = CourseFactory() runs = [ CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course), CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course), CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course), ] queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') # This should force the query to execute, and the results to be cached facet_counts = dc_queryset.facet_counts() with mock.patch.object(DistinctCountsElasticsearchQueryWrapper, 'search') as mock_search: # Calling facet_counts again shouldn't result in an additional query cached_facet_counts = dc_queryset.facet_counts() assert not mock_search.called assert facet_counts == cached_facet_counts # Calling count shouldn't result in another query, as we should have already cached it with the # first request. count = dc_queryset.count() assert not mock_search.called assert count == len(runs) # Fetching the results shouldn't result in another query, as we should have already cached them # with the initial request. results = dc_queryset.execute() assert not mock_search.called expected = {run.key for run in runs} actual = {run.key for run in results} assert expected == actual
class UserProfileSearch(CommonSearch): index = 'user_profiles' doc_types = [UserProfile] fields = ['is_superuser', 'is_staff'] facets = { 'isSuperuser': TermsFacet(field='is_superuser'), 'isAdmin': TermsFacet(field='is_staff'), 'isStaff': TermsFacet(field='is_staff'), }
class DomainSearchBase(RTDFacetedSearch): facets = { 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version'), 'role_name': TermsFacet(field='role_name'), } doc_types = [SphinxDomainDocument] index = SphinxDomainDocument._doc_type.index fields = ('display_name^5', 'name^3', 'project^3', 'type_display') operators = ['and']
class ProfileSearch(BaseFacetedSearch): doc_types = [ProfileDocument] fields = ['tags', 'name', ] facets = [ ('tags', TermsFacet(field='tags', size=100)), ('country', TermsFacet(field='country', size=500, order={'_key': 'asc'})), ('expertise', TermsFacet(field='expertise')), ('access_level', TermsFacet(field='groups')), ]
class PersonFinder(FacetedSearch): """Performs a faceted search on elastic, defines the facets in use, which field(s) to search in and by overriding search perform the search """ index = 'softwareprofs' fields = ['_all'] facets = { 'languages': TermsFacet(field='languages.raw', size=20), 'web': TermsFacet(field='web.raw', size=20), 'frameworks': TermsFacet(field='frameworks.raw', size=20), 'databases': TermsFacet(field='databases.raw', size=20), 'platforms': TermsFacet(field='platforms.raw', size=20), 'buildtools': TermsFacet(field='buildtools.raw', size=20), 'editor': TermsFacet(field='editor.raw', size=20), 'os': TermsFacet(field='os.raw', size=20), 'containers': TermsFacet(field='containers.raw', size=20) } def search(self): s = super().search() if not self._query: return s.query('match_all') return s.query('multi_match', query=self._query, operator="AND", fields="_all")
class SourceSearch(CommonSearch): index = 'sources' doc_types = [Source] fields = [ 'source_type', 'locale', 'owner', 'owner_type', 'is_active', 'version', 'custom_validation_schema', 'experimental', 'hierarchy_meaning', ] facets = { 'sourceType': TermsFacet(field='source_type'), 'locale': TermsFacet(field='locale'), 'owner': TermsFacet(field='owner'), 'ownerType': TermsFacet(field='owner_type'), 'is_active': TermsFacet(field='is_active'), 'version': TermsFacet(field='version'), 'customValidationSchema': TermsFacet(field='custom_validation_schema'), 'experimental': TermsFacet(field='experimental'), 'hierarchyMeaning': TermsFacet(field='hierarchy_meaning'), }
class PublicProfileSearch(FacetedSearch): doc_types = [ PublicProfile, ] fields = ['fullname', 'promo', 'sex'] facets = { 'promo': TermsFacet(field='promo'), 'section': TermsFacet(field='section'), 'sex': TermsFacet(field='sex') } def query(self, search, termtree): return search.query(termtree.as_query(self.fields))
class BookSearch(FacetedSearch): """ Book search """ doc_types = [Book,] fields = ['authors', 'name', 'genders', 'editor'] facets = { 'authors': TermsFacet(fields='authors'), 'genders': TermsFacet(fields='genders'), 'editors': TermsFacet(fields='editors'), 'publication': DateHistogramFacet(field='publication', interval='year') }
def test_facet_counts_includes_distinct_counts(self): """ Verify that facet_counts include distinct counts. """ course = CourseFactory() CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course) CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course) CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course) # Make sure to add both a field facet and a query facet so that we can be sure that both work. queryset = DSLFacetedSearch( index=CourseRunDocument._index._name).filter('term', title='foo') facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field) queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket(facet_field, agg.get_aggregation()) queryset.aggs.bucket('_query_{0}'.format('hidden'), 'filter', filter=ESDSLQ('bool', filter=ESDSLQ('term', hidden=True))) dc_queryset = DistinctCountsSearchQuerySet.from_queryset( queryset).with_distinct_counts('aggregation_key') facet_counts = dc_queryset.facet_counts() # Field facets are expected to be formatted as a list of three-tuples (field_value, count, distinct_count) for val, count, distinct_count in facet_counts['fields'][ 'pacing_type']: assert val in {'self_paced', 'instructor_paced'} if val == 'self_paced': assert count == 2 assert distinct_count == 1 elif val == 'instructor_paced': assert count == 1 assert distinct_count == 1 # Query facets are expected to be formatted as a dictionary mapping facet_names to two-tuples (count, # distinct_count) hidden_count, hidden_distinct_count = facet_counts['queries']['hidden'] assert hidden_count == 2 assert hidden_distinct_count == 1
class ArtistSearch(BaseFacetedSearch): doc_types = [ArtistDocument] fields = [ 'tags', 'name', ] facets = [ ('tags', TermsFacet(field='tags', size=100)), ('country', TermsFacet(field='country', size=500, order={'_key': 'asc'})), ('type', TermsFacet(field='type', size=20, order={'_key': 'asc'})), ]
class FS(FacetedSearch): doc_types = [ TargetIndex, ] # fields to be searched(used for searching integer field as needed) fieldsI = [ 'targetID', 'target', 'annotation^3', 'species^2', 'status', 'clone', 'protein', 'genePage', 'uniprot', 'taxonClass', 'superkingdom', 'targetRole', 'batch', 'community', 'maxCode' ] # fields to be searched(used for searching only text field as needed) fieldsS = [ 'target', 'annotation', 'species', 'status', 'clone', 'protein', 'genePage', 'uniprot', 'taxonClass', 'superkingdom', 'targetRole', 'batch', 'community' ] facets = { # use bucket aggregations to define facets 'superkingdom': TermsFacet(field='superkingdom'), 'current_status': TermsFacet(field='status'), 'clone_availability': TermsFacet(field='clone'), 'protein_availability': TermsFacet(field='protein') } def query(self, search, search_terms): if search_terms: list = search_terms.split() hasInterger = False for word in list: if word.isdigit(): hasInterger = True q = MultiMatch(query=search_terms, fields=self.fieldsI, type='cross_fields', analyzer='standard', operator='and') ''' if hasInterger: q = MultiMatch(query=search_terms, fields=self.fieldsI, type = 'cross_fields', analyzer = 'standard', operator = 'and') ''' return search.query(q) return search
def q(self, value): # noqa: C901 res = [] _d = self._metadata.get('aggs', {}) lang = get_language() for facet_name in value.split(','): if facet_name in _d: _f = dict(_d[facet_name]) _field = _f['field'] _translated = _f.get('translated', False) _path = _f.get('nested_path', None) _filter = _f.get('filter') if isinstance(_filter, dict): _filter = _filter.copy() for key, value in _filter.items(): if callable(value): _filter[key] = value() if _translated and _field: _field = '{}.{}'.format(_field, lang) kw = { 'size': _f.get('size', 500), 'min_doc_count': _f.get('min_doc_count', 1), # 'keyed': _f.get('keyed', False) } _order = _f.get('order') if _order: kw['order'] = _order _format = _f.get('format') if _format: kw['format'] = _format _missing = _f.get('missing') if _missing: kw['missing'] = _missing terms_facet = TermsFacet(field=_field, **kw) filter_facet = FilterFacet( term=_filter, aggs={'inner': terms_facet.get_aggregation()}) if _filter: inner_facet = filter_facet else: inner_facet = terms_facet facet = NestedFacet(_path, inner_facet) if _path else inner_facet res.append((facet_name, facet)) return res
def test_with_distinct_counts_raises_when_queryset_includes_unsupported_options( self): """ Verify that an error is raised if the original queryset includes options that are not supported by our custom Search class. """ dc_queryset = DistinctCountsSearchQuerySet.from_queryset( DSLFacetedSearch()) with pytest.raises(RuntimeError) as err: facet_field = 'start' agg_filter = ESDSLQ('match_all') agg = DateHistogramFacet(field=facet_field, interval='month') dc_queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket( facet_field, agg.get_aggregation()) dc_queryset.with_distinct_counts('aggregation_key') assert str( err.value ) == 'DistinctCountsSearchQuerySet does not support date facets.' dc_queryset = DistinctCountsSearchQuerySet.from_queryset( DSLFacetedSearch()) with pytest.raises(RuntimeError) as err: facet_field = 'pacing_type' agg_filter = ESDSLQ('match_all') agg = TermsFacet(field=facet_field, order='term') dc_queryset.aggs.bucket('_filter_' + facet_field, 'filter', filter=agg_filter).bucket( facet_field, agg.get_aggregation()) dc_queryset.with_distinct_counts('aggregation_key') assert 'DistinctCountsSearchQuerySet only supports a limited set of field facet options.' in str( err.value)
class FileSearch(RTDFacetedSearch): facets = { 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version') } def query(self, search, query): """ Add query part to ``search`` Overriding because we pass ES Query object instead of string """ if query: search = search.query(query) return search
class ProjectSearchBase(RTDFacetedSearch): facets = {'language': TermsFacet(field='language')} doc_types = [ProjectDocument] index = ProjectDocument._index._name fields = ('name^10', 'slug^5', 'description') operators = ['and', 'or'] excludes = ['users', 'language']
class NovelFacetedSearch(FacetedSearch): index = ALIAS # fields that should be searched fields = ['description'] facets = { # use bucket aggregations to define facets 'tag': TermsFacet(field='tag.keyword', size=10), 'genre': TermsFacet(field='genre.keyword', size=10), # timezoneが怪しいので削除 # 'created_at': DateHistogramFacet(field='created_at', interval='day', format="%Y-%m-%d"), } def highlight(self, search): s = search.highlight('description', fragment_size=300) return s
class SomeSchema(QuerysetTestHelper, List): q = fields.SearchFilterField( search_fields=['body'], ) sort = fields.OrderingFilterField( default_ordering=['-modified', ], ordering_fields={ "id": "id", "title": "title.raw", "modified": "modified", "created": "created" } ) facet = fields.FacetedFilterField( facets={'somefield': TermsFacet(field='somefield')}, ) highlight = fields.HighlightBackend( highlight_fields={ 'somefield': { 'options': { 'post_tags': ['-*'], 'pre_tags': ['*-'] }, 'enabled': True } } )
class LabelSearch(FacetedSearch): doc_types = [LabelDocument] # fields that should be searched fields = [ 'tags', 'name', ] facets = { # use bucket aggregations to define facets #'tags': TermsFacet(field='tags', size=5), 'country': TermsFacet(field='country'), #'publishing_frequency': DateHistogramFacet(field='published_from', interval='month') } # def search(self, *args, **kwargs): # # override methods to add custom pieces # # s = super().search() # s = super(BlogSearch, self).search(*args, **kwargs) # return s.filter('range', publish_from={'lte': 'now/h'}) def query(self, search, query): """ Add query part to ``search``. Override this if you wish to customize the query used. """ if query: return search.update_from_dict(query) return search
def q(self, value): res = [] _d = self._metadata.get('aggs', {}) for facet_name in value.split(','): if facet_name in _d: _f = dict(_d[facet_name]) _field = _f['field'] _path = _f.get('nested_path', None) kw = { 'size': _f.get('size', 500), 'min_doc_count': _f.get('min_doc_count', 1), 'interval': _f.get('interval', None), 'keyed': _f.get('keyed', False) } _order = _f.get('order') if _order: kw['order'] = _order _format = _f.get('format') if _format: kw['format'] = _format _missing = _f.get('missing') if _missing: kw['missing'] = _missing _facet = TermsFacet(field=_field, **kw) facet = NestedFacet(_path, _facet) if _path else _facet res.append((facet_name, facet)) return res
class ApplicationsList(List): id = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants. LOOKUP_QUERY_GT, constants.LOOKUP_QUERY_GTE, constants.LOOKUP_QUERY_LT, constants.LOOKUP_QUERY_LTE, constants.LOOKUP_QUERY_IN ]) ids = fields.IdsSearchField() q = fields.SearchFilterField( search_fields=['title', 'notes', 'author', 'tags', 'datasets.title'], ) tags = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE ]) author = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE ]) facet = fields.FacetedFilterField(facets={ 'tags': TermsFacet(field='tags', size=500), 'modified': DateHistogramFacet(field='modified', interval='month', size=500) }, ) sort = fields.OrderingFilterField(default_ordering=[ '-modified', ], ordering_fields={ "id": "id", "title": "title.raw", "modified": "modified", "created": "created" }) highlight = fields.HighlightBackend( highlight_fields={ 'title': { 'options': { 'pre_tags': ['<em>'], 'post_tags': ['</em>'], }, 'enabled': True }, 'notes': { 'options': { 'pre_tags': ['<em>'], 'post_tags': ['</em>'], }, 'enabled': True } }) class Meta: strict = True
class ProductSearch(FacetedSearch): index = "products" doc_types = [Product, ] fields = ['title', 'description', 'tags'] facets = { 'tags': TermsFacet(field='tags'), }