class QASearch(FacetedSearch): doc_types = [Question] index = settings.ES_INDEX fields = ['tags', 'title', 'body'] facets = { 'tags': TermsFacet(field='tags', size=5), 'months': DateHistogramFacet(field='creation_date', interval='month', min_doc_count=0), } def query(self, search, query): if not query: return search # query in tags, title and body for query q = Q('multi_match', fields=['tags^10', 'title', 'body'], query=query) # also find questions that have answers matching query q |= Q('has_child', type='answer', query=Q('match', body=query), inner_hits={ 'highlight': { "pre_tags": ["[[["], "post_tags": ["]]]"], 'fields': { 'body': { 'fragment_size': 30 } } }, '_source': False, 'size': 1 }) # take the rating field into account when sorting search = search.query( 'function_score', query=q, functions=[SF('field_value_factor', field='rating')]) return search def highlight(self, search): return search
class DocumentSearch(FacetedSearch): doc_types = ["document"] index = settings.ES_INDEX fields = ['title^5', 'abstract^3'] facets = { 'keywords': TermsFacet(field='keywords.keyword', size=5), 'languages': TermsFacet(field='languages.keyword', size=10), 'education_levels': TermsFacet(field='education_levels.keyword', size=10), 'communities': TermsFacet(field='communities.keyword', size=10), 'year_of_available': DateHistogramFacet(field='year_of_available', interval='month', min_doc_count=0), 'document_type': TermsFacet(field='document_type.keyword', size=10), 'document_file_type': TermsFacet(field='document_file_type.keyword', size=10), 'document_authors': TermsFacet(field='document_authors', size=10), 'license_type': TermsFacet(field='license_type.keyword', size=10), 'collections': TermsFacet(field='collections.keyword', size=10), } def query(self, search, query): if not query: return search # query in tags, title and body for query q = Q('multi_match', fields=['title', 'abstract'], query=query) # take the title field into account when sorting search = search.query( 'function_score', query=q, functions=[SF('field_value_factor', field='title')]) return search def highlight(self, search): return search
def get_configured_facets(): facets = None configuration = get_configuration() if configuration and hasattr(configuration, 'facets'): facets = configuration.facets.split() if facets: configured_facets = {} for facet in facets: if ',' in facet: field, interval = facet.split(',', 1) if ',' in interval: intervals = interval.split(',') ranges = [] for interval in intervals: name, numbers = interval.split(':') numbers = numbers.split('-') irange = [] for number in numbers: if number.lower() == 'none': irange.append(None) else: try: irange.append(int(number)) except ValueError: continue irange = tuple(irange) ranges.append((name, irange)) configured_facets[field] = RangeFacet(field=field, ranges=ranges) elif interval in DATE_INTERVALS: configured_facets[field] = DateHistogramFacet( field=field, interval=interval) else: try: interval = int(interval) configured_facets[field] = HistogramFacet( field=field, interval=interval) except ValueError: pass else: configured_facets[facet] = TermsFacet(field=facet + '.keyword') else: configured_facets = DEFAULT_FACETS return configured_facets
class ArticlesList(List): id = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_QUERY_GT, constants.LOOKUP_QUERY_GTE, constants.LOOKUP_QUERY_LT, constants.LOOKUP_QUERY_LTE, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE, ]) ids = fields.IdsSearchField() title = fields.SearchFilterField(search_i18n_fields=['title']) notes = fields.SearchFilterField(search_i18n_fields=['notes']) q = fields.SearchFilterField( search_i18n_fields=['title', 'notes', 'datasets.title'], ) tags = fields.FilteringFilterField( lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE, constants.LOOKUP_QUERY_CONTAINS ], translated=True ) author = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE, constants.LOOKUP_QUERY_CONTAINS, constants.LOOKUP_QUERY_STARTSWITH, constants.LOOKUP_QUERY_ENDSWITH, ]) slug = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE, constants.LOOKUP_QUERY_STARTSWITH, constants.LOOKUP_QUERY_ENDSWITH, ]) category = fields.NestedFilteringField('category', field_name='category.id', lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_QUERY_GT, constants.LOOKUP_QUERY_GTE, constants.LOOKUP_QUERY_LT, constants.LOOKUP_QUERY_LTE, constants.LOOKUP_QUERY_IN ]) facet = fields.FacetedFilterField( facets={ 'tags': TermsFacet(field='tags', size=500), 'modified': DateHistogramFacet(field='modified', interval='month', size=500) }, ) title_suggest = fields.SuggesterFilterField( field='title.suggest', suggesters=[ constants.SUGGESTER_COMPLETION, constants.SUGGESTER_PHRASE, constants.SUGGESTER_TERM ] ) sort = fields.OrderingFilterField( default_ordering=['-modified', ], ordering_fields={ "id": "id", "title": "title.{lang}.sort", "modified": "modified", "created": "created" } ) highlight = fields.HighlightBackend( highlight_fields={ 'title': { 'options': { 'pre_tags': ['<em>'], 'post_tags': ['</em>'], }, 'enabled': True }, 'notes': { 'options': { 'pre_tags': ['<em>'], 'post_tags': ['</em>'], }, 'enabled': True } } ) class Meta: strict = True
class TestFacetedFilterField(object): test_field_name = "faceted_filter_field" @pytest.mark.parametrize( ', '.join(['facets', 'context', 'aggs_query']), [(None, ['date'], {}), ({ 'status': TermsFacet(field='status'), 'date': DateHistogramFacet(field='date', interval='year'), 'range': RangeFacet(field='height', ranges=[("few", (None, 2)), ("lots", (2, None))]) }, ['unknown'], {}), ({ 'status': TermsFacet(field='status') }, ['status'], { 'aggs': { '_filter_status': { 'aggs': { 'status': { 'terms': { 'field': 'status' } } }, 'filter': { 'match_all': {} } } } }), ({ 'date': DateHistogramFacet(field='date', interval='year') }, ['date'], { 'aggs': { '_filter_date': { 'aggs': { 'date': { 'date_histogram': { 'field': 'date', 'interval': 'year', 'min_doc_count': 0 } } }, 'filter': { 'match_all': {} } } }, }), ({ 'range': RangeFacet(field='height', ranges=[("few", (None, 2)), ("lots", (2, None))]) }, ['range'], { 'aggs': { '_filter_range': { 'aggs': { 'range': { 'range': { 'field': 'height', 'keyed': False, 'ranges': [{ 'key': 'few', 'to': 2 }, { 'key': 'lots', 'from': 2 }] } } }, 'filter': { 'match_all': {} } } } })]) def test_queryset(self, facets, context, aggs_query, es_dsl_queryset): fld = FacetedFilterField(facets=facets, field_name=self.test_field_name) valid_query = {} valid_query.update(aggs_query) qs = fld.prepare_queryset(es_dsl_queryset, context) ret = qs.to_dict() assert ret == valid_query
from plone.app.contentlisting.interfaces import IContentListingObject from Products.CMFPlone.PloneBatch import Batch from Products.CMFPlone.utils import getToolByName from Products.Five.browser import BrowserView from collective.es.index.esproxyindex import SEARCH_FIELDS from collective.es.index.esproxyindex import BATCH_SIZE from collective.es.index.utils import get_configuration from collective.es.index.utils import get_query_client from collective.es.index.utils import index_name DEFAULT_FACETS = { 'subjects': TermsFacet(field='subjects.keyword'), 'review_state': TermsFacet(field='review_state.keyword'), 'modified': DateHistogramFacet(field='modified', interval='month'), } DATE_INTERVALS = ['month', 'week', 'day', 'hour'] DATE_FORMATS = { 'month': '%B %Y', 'week': 'Week of %b %-d, %Y', 'day': '%B %-d, %Y', 'hour': '%b %-d %-I %p', } def get_configured_facets(): facets = None configuration = get_configuration()
class PublicationSearch(FacetedSearch): doc_types = [Publication] index = 'offenegesetze_publications' fields = ['title^3', 'content'] equivalences = { 'year': {'date'}, 'date': {'year'} } facets = { 'kind': TermsFacet(field='kind'), 'year': NumberRangeFacet(field='year'), 'page': NumberRangeFacet(field='page'), 'number': NumberRangeFacet(field='number'), 'date': DateHistogramFacet( field='date', interval='year' ) } def __getitem__(self, n): assert isinstance(n, slice) self._s = self._s[n] return self def aggregate(self, search): "Respect equivalences of facets" for f, facet in self.facets.items(): agg = facet.get_aggregation() agg_filter = Q('match_all') for field, filter in self._filters.items(): if f == field or field in self.equivalences.get(f, set()): continue agg_filter &= filter search.aggs.bucket( '_filter_' + f, 'filter', filter=agg_filter ).bucket(f, agg) def add_sort(self, *sort_args): self._sort = sort_args self._s = self._s.sort(*sort_args) def add_pagination_filter(self, filter_kwargs): self._s = self._s.filter('range', **filter_kwargs) def query(self, search, query): """ Add query part to ``search``. Override this if you wish to customize the query used. """ if query: return search.query( "simple_query_string", query=query, fields=self.fields, default_operator='and', lenient=True ) return search
class ArticlesList(List): id = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants. LOOKUP_QUERY_GT, constants.LOOKUP_QUERY_GTE, constants.LOOKUP_QUERY_LT, constants.LOOKUP_QUERY_LTE, constants.LOOKUP_QUERY_IN ]) ids = fields.IdsSearchField() q = fields.SearchFilterField( search_fields=['title', 'notes', 'author', 'tags', 'datasets.title'], ) tags = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE ]) author = fields.FilteringFilterField(lookups=[ constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX, constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE ]) facet = fields.FacetedFilterField(facets={ 'tags': TermsFacet(field='tags', size=500), 'modified': DateHistogramFacet(field='modified', interval='month', size=500) }, ) title_suggest = fields.SuggesterFilterField( field='title.suggest', suggesters=[ constants.SUGGESTER_COMPLETION, constants.SUGGESTER_PHRASE, constants.SUGGESTER_TERM ]) sort = fields.OrderingFilterField(default_ordering=[ '-modified', ], ordering_fields={ "id": "id", "title": "title.raw", "modified": "modified", "created": "created" }) highlight = fields.HighlightBackend( highlight_fields={ 'title': { 'options': { 'pre_tags': ['<em>'], 'post_tags': ['</em>'], }, 'enabled': True }, 'notes': { 'options': { 'pre_tags': ['<em>'], 'post_tags': ['</em>'], }, 'enabled': True } }) class Meta: strict = True
def _data(self, request, cleaned, *args, explain=None, **kwargs): m_search = MultiSearch() search = Search(using=connection, index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME, extra={'size': 0}) search.aggs.bucket( 'documents_by_type', TermsFacet(field='model').get_aggregation()).bucket( 'by_month', DateHistogramFacet(field='created', interval='month', min_doc_count=0).get_aggregation()) d_search = DatasetDocument().search().extra(size=0).filter( 'match', status='published') r_search = ResourceDocument().search().extra(size=0).filter( 'match', status='published') d_search.aggs.bucket( 'datasets_by_institution', NestedFacet( 'institution', TermsFacet(field='institution.id')).get_aggregation()) d_search.aggs.bucket( 'datasets_by_categories', NestedFacet( 'categories', TermsFacet(field='categories.id', min_doc_count=1, size=50)).get_aggregation()) d_search.aggs.bucket( 'datasets_by_category', NestedFacet( 'category', TermsFacet(field='category.id', min_doc_count=1, size=50)).get_aggregation()) d_search.aggs.bucket('datasets_by_tag', TermsFacet(field='tags').get_aggregation()) d_search.aggs.bucket( 'datasets_by_keyword', Nested(aggs={ 'inner': Filter( aggs={'inner': Terms(field='keywords.name')}, term={'keywords.language': get_language()}, ) }, path='keywords')) d_search.aggs.bucket('datasets_by_formats', TermsFacet(field='formats').get_aggregation()) d_search.aggs.bucket( 'datasets_by_openness_scores', TermsFacet(field='openness_scores').get_aggregation()) r_search.aggs.bucket('resources_by_type', TermsFacet(field='type').get_aggregation()) m_search = m_search.add(search) m_search = m_search.add(d_search) m_search = m_search.add(r_search) if explain == '1': return m_search.to_dict() try: resp1, resp2, resp3 = m_search.execute() # TODO: how to concatenate two responses in more elegant way? resp1.aggregations.datasets_by_institution = resp2.aggregations.datasets_by_institution resp1.aggregations.datasets_by_categories = resp2.aggregations.datasets_by_categories resp1.aggregations.datasets_by_category = resp2.aggregations.datasets_by_category resp1.aggregations.datasets_by_tag = resp2.aggregations.datasets_by_tag resp1.aggregations.datasets_by_keyword = resp2.aggregations.datasets_by_keyword resp1.aggregations.datasets_by_formats = resp2.aggregations.datasets_by_formats resp1.aggregations.datasets_by_openness_scores = resp2.aggregations.datasets_by_openness_scores resp1.aggregations.resources_by_type = resp3.aggregations.resources_by_type return resp1 except TransportError as err: try: description = err.info['error']['reason'] except KeyError: description = err.error raise falcon.HTTPBadRequest(description=description)
msg = 'Search Engine unknown error: {}'.format(e) output['error'] = msg return output all_facets = { 'subjects': TermsFacet(field='metadata_json.subjects.subject.raw'), 'creators': TermsFacet(field='metadata_json.creators.creatorName.raw'), 'publicationYear': TermsFacet(field='metadata_json.publicationYear'), 'publisher': TermsFacet(field='metadata_json.publisher.raw'), 'collectedStartDate': DateHistogramFacet(field='metadata_json.dates.date.gte', interval="month"), 'collectedEndDate': DateHistogramFacet(field='metadata_json.dates.date.lte', interval="month"), } class MetadataSearch(FacetedSearch): doc_types = [ Metadata, ] date_query = { 'simple_query_string': { 'fields': ['metadata_json.dates.dateType'], 'query': 'Collected' }