def _apply_agg(self, search_query): if self.selected: return search_query agg_filters = search_query.get_context().post_filters return search_query.aggregations( **{ self._agg_name: agg.Filter( Bool.must(*chain(agg_filters, [self._condition]))) })
def _get_expression(self, params): values = self._get_values_from_params(params.get(self.alias, {})) if not values: return None if len(values) == 1: return self.field == values[0] if self._conj_operator == QueryFilter.CONJ_AND: return Bool.must(*(self.field == v for v in values)) else: return self.field.in_(values)
def _get_expression(self, params): values = params.get(self.alias, {}).get('exact') if not values: if self.default: values = [[self.default]] if not values: return None expressions = [] for v in values: w = v[0] filter_value = self.get_value(w) if filter_value and not isinstance(filter_value.expr, MatchAll): expressions.append(filter_value.expr) if not expressions: return None if self._conj_operator == QueryFilter.CONJ_AND: return Bool.must(*expressions) else: return Bool.should(*expressions)
def _apply_filter(self, search_query, params): params = params.get(self.alias) or {} self.from_value = self._get_from_value(params) self.to_value = self._get_to_value(params) if self.from_value is None and self.to_value is None: return search_query expr = Nested( path=self.path, query=Bool.must( self.key_expression, self.value_field.range(gte=self.from_value, lte=self.to_value), ) ) return search_query.post_filter(expr, meta={'tags': {self.name}})
def _get_expression(self, params): values = self._get_values_from_params(params.get(self.alias, {})) if not values: return None if len(values) == 1: return Nested( path=self.path, query=Bool.must( self.key_expression, self.value_field == values[0] ) ) expressions = [self.key_expression] if self._conj_operator == QueryFilter.CONJ_AND: expressions.extend([self.value_field == v for v in values]) else: expressions.append(self.value_field.in_(values)) return Nested( path=self.path, query=Bool.must(*expressions) )
def test_weight(): assert Weight(3).to_elastic() == {"weight": 3} assert Weight(2, filter=Bool.must( PostDocument.status.in_([0, 1]), PostDocument.created_date >= 'now/d-7d') ).to_elastic() == { "weight": 2, "filter": { "bool": { "must": [ {"terms": {"status": [0, 1]}}, {"range": {"created_date": {"gte": "now/d-7d"}}} ] } } }
def _apply_agg(self, search_query): order_aggs, order_by = self._extract_orders(search_query) group_agg = agg.Terms( self.group_by, size=self.per_page, order=order_by, aggs=dict( {self._top_hits_agg_name: agg.TopHits(**self.group_kwargs)}, **order_aggs ) ) pagination_agg = agg.Terms( self.group_by, size=self.max_items, order=order_by, aggs=order_aggs, ) post_filters = list(search_query.get_context().iter_post_filters()) if self.page == 1: page_aggs = { self._agg_name: group_agg, self._pagination_agg_name: pagination_agg, } else: group_values = self._get_group_values(search_query, post_filters, pagination_agg) post_filters.append(self.group_by.in_(group_values)) page_aggs={ self._agg_name: group_agg, } if post_filters: aggs = { self._filter_agg_name: agg.Filter( Bool.must(*post_filters), aggs=page_aggs ) } else: aggs = page_aggs search_query = search_query.aggs(aggs) return search_query
def _apply_agg(self, search_query): filters = self._get_agg_filters( search_query.get_context().iter_post_filters_with_meta(), {self.qf._name, self.name} ) aggs = {} if self._compute_enabled: aggs.update({ self._enabled_agg_name: agg.Nested( path=self.path, aggs={ self._filter_key_agg_name: agg.Filter( self.key_expression, aggs={ self._filter_value_agg_name: agg.Filter( self.value_field != None ) } ) } ) }) if self._compute_min_max: stat_aggs = { self._enabled_agg_name_stat: agg.Nested( path=self.path, aggs={ self._filter_key_agg_name: agg.Filter( self.key_expression, aggs={ self._min_agg_name: agg.Min(self.value_field), self._max_agg_name: agg.Max(self.value_field), } ) } ) } if filters: aggs.update({ self._filter_agg_name: agg.Filter(Bool.must(*filters), aggs=stat_aggs) }) else: aggs.update(stat_aggs) return search_query.aggregations(**aggs)
def _apply_agg(self, search_query): exclude_tags = {self.qf._name} if self._conj_operator == QueryFilter.CONJ_OR: exclude_tags.add(self.name) filters = self._get_agg_filters( search_query.iter_post_filters_with_meta(), exclude_tags ) terms_agg = agg.Terms(self.field, instance_mapper=self._instance_mapper, **self._agg_kwargs) if filters: aggs = { self._filter_agg_name: agg.Filter( Bool.must(*filters), aggs={self._agg_name: terms_agg} ) } else: aggs = {self._agg_name: terms_agg} return search_query.aggregations(**aggs)
def _apply_agg(self, search_query): exclude_tags = {self.qf._name} if self._conj_operator == QueryFilter.CONJ_OR: exclude_tags.add(self.name) filters = self._get_agg_filters( search_query.get_context().iter_post_filters_with_meta(), exclude_tags ) terms_agg = agg.Terms(self.field, instance_mapper=self._instance_mapper, **self._agg_kwargs) if filters: aggs = { self._filter_agg_name: agg.Filter( Bool.must(*filters), aggs={self._agg_name: terms_agg} ) } else: aggs = {self._agg_name: terms_agg} return search_query.aggregations(**aggs)
def _apply_agg(self, search_query): exclude_tags = {self.qf._name} if self._conj_operator == QueryFilter.CONJ_OR: exclude_tags.add(self.name) filters = self._get_agg_filters( search_query.get_context().iter_post_filters_with_meta(), exclude_tags ) filter_aggs = {} for fv in self.values: filter_aggs[self._make_agg_name(fv.value)] = agg.Filter(fv.expr, **self.agg_kwargs) if filters: aggs = { self._filter_agg_name: agg.Filter( Bool.must(*filters), aggs=filter_aggs ) } else: aggs = filter_aggs return search_query.aggregations(**aggs)
def _apply_agg(self, search_query): exclude_tags = {self.qf._name} if self._conj_operator == QueryFilter.CONJ_OR: exclude_tags.add(self.name) filters = self._get_agg_filters( search_query.iter_post_filters_with_meta(), exclude_tags ) filter_aggs = {} for fv in self.values: filter_aggs[self._make_agg_name(fv.value)] = agg.Filter(fv.expr, **self.agg_kwargs) if filters: aggs = { self._filter_agg_name: agg.Filter( Bool.must(*filters), aggs=filter_aggs ) } else: aggs = filter_aggs return search_query.aggregations(**aggs)
def _apply_agg(self, search_query): filters = self._get_agg_filters( search_query.iter_post_filters_with_meta(), {self.qf._name, self.name} ) aggs = {} if self._compute_enabled: aggs.update({ self._enabled_agg_name: agg.Filter(self.field != None), }) if self._compute_min_max: stat_aggs = { self._min_agg_name: agg.Min(self.field), self._max_agg_name: agg.Max(self.field), } if filters: aggs.update({ self._filter_agg_name: agg.Filter(Bool.must(*filters), aggs=stat_aggs) }) else: aggs.update(stat_aggs) return search_query.aggregations(**aggs)
def test_attr_bool_facet_filter__multiple_selected_values(bool_qf, compiler): sq = bool_qf.apply(SearchQuery(), {'a1': ['true', 'false'], 'a2': 'true'}) assert sq.to_dict(compiler=compiler) == (SearchQuery().aggs({ 'qf.attr_bool.filter': agg.Filter( Bool.must( Terms('attr.bool', [0b11, 0b10]), Term('attr.bool', 0b101), ), aggs={'qf.attr_bool': agg.Terms(Field('attr.bool'), size=100)}), 'qf.attr_bool.filter:1': agg.Filter(Term('attr.bool', 0b101), aggs={ 'qf.attr_bool:1': agg.Terms(Field('attr.bool'), size=2, include=[0b10, 0b11]) }), 'qf.attr_bool.filter:2': agg.Filter(Terms('attr.bool', [0b11, 0b10]), aggs={ 'qf.attr_bool:2': agg.Terms(Field('attr.bool'), size=2, include=[0b100, 0b101]) }), }).post_filter( Bool.must( Terms('attr.bool', [0b11, 0b10]), Term('attr.bool', 0b101), )).to_dict(compiler=compiler)) qf_res = bool_qf.process_result( SearchResult( { 'aggregations': { 'qf.attr_bool.filter': { 'doc_count': 200, 'qf.attr_bool': { 'buckets': [ { 'key': 0b11, 'doc_count': 123, }, { 'key': 0b101, 'doc_count': 1 }, ] } }, 'qf.attr_bool.filter:1': { 'doc_count': 163, 'qf.attr_bool:1': { 'buckets': [ { 'key': 0b11, 'doc_count': 123, }, { 'key': 0b10, 'doc_count': 99 }, ] } }, 'qf.attr_bool.filter:2': { 'doc_count': 144, 'qf.attr_bool:2': { 'buckets': [ { 'key': 0b101, 'doc_count': 1 }, ] } }, } }, aggregations=sq.get_context().aggregations)) assert len(qf_res.attr_bool.facets) == 2 facet = qf_res.attr_bool.get_facet(1) assert len(facet.all_values) == 2 assert len(facet.selected_values) == 2 assert len(facet.values) == 0 assert facet.all_values[0] is facet.selected_values[0] assert facet.all_values[1] is facet.selected_values[1] assert facet.all_values[0].value is True assert facet.all_values[0].count == 123 assert facet.all_values[0].count_text == '123' assert facet.all_values[0].selected is True assert facet.all_values[1].value is False assert facet.all_values[1].count == 99 assert facet.all_values[1].count_text == '99' assert facet.all_values[1].selected is True facet = qf_res.attr_bool.get_facet(2) assert len(facet.all_values) == 1 assert len(facet.selected_values) == 1 assert len(facet.values) == 0 assert facet.all_values[0] is facet.selected_values[0] assert facet.all_values[0].value is True assert facet.all_values[0].count == 1 assert facet.all_values[0].count_text == '1' assert facet.all_values[0].selected is True
def test_expression(self): f = DynamicDocument.fields e = Params({'foo': 'bar'}) self.assert_expression( e, {"foo": "bar"} ) self.assertEqual(e['foo'], 'bar') self.assertTrue('foo' in e) self.assert_expression( Match(f.message, 'this is a test'), { "match": { "message": "this is a test", } } ) self.assert_expression( Match( f.message, 'this is a test', minimum_should_match='100%', cutoff_frequency=0.001, boost=2.1 ), { "match": { "message": { "query": "this is a test", "minimum_should_match": "100%", "cutoff_frequency": 0.001, "boost": 2.1, } } } ) self.assert_expression( Term(f.user, 'kimchy'), { "term": {"user": "******"} } ) self.assert_expression( Term(f.user, 'kimchy', boost=1.2), { "term": {"user": {"value": "kimchy", "boost": 1.2}} } ) self.assert_expression( Term('user.login', 'kimchy'), { "term": {"user.login": "******"} } ) self.assert_expression( Terms(f.status, [0]), { "terms": { "status": [0] } } ) self.assert_expression( Terms(f.tags, ['blue', 'pill'], minimum_should_match=1), { "terms": { "tags": ["blue", "pill"], "minimum_should_match": 1 } } ) self.assert_expression( Exists(f.tags), { "exists": {"field": "tags"} } ) self.assert_expression( Missing(f.tags, _cache=True), { "missing": { "field": "tags", "_cache": True } } ) self.assert_expression( Bool( must=Term(f.user, 'kimchy'), filter=Term(f.tag, 'tech'), must_not=Range(f.age, from_=10, to=20), should=[Term(f.tag, 'wow'), Term(f.tag, 'elasticsearch', boost=2.1)], minimum_should_match=1, boost=1.0, ), { "bool": { "must": { "term": {"user": "******"} }, "filter": { "term": {"tag": "tech"} }, "must_not": { "range": { "age": {"from": 10, "to": 20} } }, "should": [ { "term": {"tag": "wow"} }, { "term": {"tag": {"value": "elasticsearch", "boost": 2.1}} } ], "minimum_should_match": 1, "boost": 1.0 } } ) e = MultiMatch( "Will Smith", [self.index.star.title.boost(4), self.index.star.wildcard('*_name').boost(2)], minimum_should_match='100%' ) self.assert_expression( e, { "multi_match": { "query": "Will Smith", "fields": ["title^4", "*_name^2"], "minimum_should_match": "100%" } } ) self.assertEqual( e._collect_doc_classes(), {self.index.star} ) self.assert_expression( Range(self.index.product.price, lte=100, boost=2.2, execution='index', _cache=False), { "range": { "price": {"lte": 100, "boost": 2.2}, "execution": "index", "_cache": False } } ) self.assert_expression( Boosting( positive=Term(f.field1, 'value1'), negative=Term(f.field2, 'value2'), negative_boost=0.2 ), { "boosting": { "positive": { "term": { "field1": "value1" } }, "negative": { "term": { "field2": "value2" } }, "negative_boost": 0.2 } } ) self.assert_expression( Common( f.body, 'nelly the elephant not as a cartoon', cutoff_frequency=0.001, minimum_should_match=dict(low_freq=2, high_freq=3), ), { "common": { "body": { "query": "nelly the elephant not as a cartoon", "cutoff_frequency": 0.001, "minimum_should_match": { "low_freq": 2, "high_freq": 3 } } } } ) self.assert_expression( ConstantScore(filter=Term(f.user, 'kimchy'), boost=1.2), { "constant_score": { "filter": { "term": { "user": "******"} }, "boost": 1.2 } } ) self.assert_expression( FunctionScore( query=MatchAll(), field_value_factor={ 'field': f.popularity, 'factor': 1.2, 'modifier': 'sqrt', } ), { "function_score": { "query": {"match_all": {}}, "field_value_factor": { "field": "popularity", "factor": 1.2, "modifier": "sqrt" } } } ) self.assert_expression( DisMax([Term(f.age, 34), Term(f.age, 35)], boost=1.2, tie_breaker=0.7), { "dis_max": { "tie_breaker": 0.7, "boost": 1.2, "queries": [ { "term" : { "age" : 34 } }, { "term" : { "age" : 35 } } ] } } ) self.assert_expression( Filtered( filter=Range(f.created, gte='now - 1d / d'), query=Match(f.tweet, 'full text search') ), { "filtered": { "query": { "match": { "tweet": "full text search" } }, "filter": { "range": { "created": { "gte": "now - 1d / d" }} } } } ) self.assert_expression( Ids(['123456']), { "ids": { "values": ["123456"] } } ) self.assert_expression( Ids(['1', '4', '100'], type="my_type"), { "ids": { "type": "my_type", "values": ["1", "4", "100"] } } ) self.assert_expression( Prefix(f.user, 'ki', boost=2.0), { "prefix": { "user": { "value": "ki", "boost": 2.0 } } } ) self.assert_expression( MatchAll(), {"match_all": {}} ) self.assert_expression( MatchAll(boost=1.2), { "match_all": { "boost" : 1.2 } } ) self.assert_expression( Query(Match(f.title, 'this that thus')), { "query": { "match": { "title": "this that thus" } } } ) self.assert_expression( Query(Match(f.title, 'this that thus'), _cache=True), { "fquery": { "query": { "match": { "title": "this that thus" } }, "_cache": True } } ) self.assertRaises(NotImplementedError, BooleanExpression) self.assert_expression( And( Range(f.post_date, from_='2010-03-01', to='2010-04-01'), Prefix(f.name.second, 'ba') ), { "and": [ { "range": { "post_date": { "from": "2010-03-01", "to": "2010-04-01" } } }, { "prefix" : { "name.second" : "ba" } } ] } ) self.assert_expression( And( Range(f.post_date, from_='2010-03-01', to='2010-04-01'), Prefix(f.name.second, 'ba'), _cache=True ), { "and": { "filters": [ { "range": { "post_date": { "from": "2010-03-01", "to": "2010-04-01" } } }, { "prefix" : { "name.second" : "ba" } } ], "_cache": True } } ) self.assert_expression( Or(Term(f.name.second, 'banon'), Term(f.name.nick, 'kimchy')), { "or": [ { "term": {"name.second": "banon"} }, { "term": {"name.nick": "kimchy"} } ] } ) self.assert_expression( And(Or(Term(f.name.nick, 'kimchy'))), { "term": {"name.nick": "kimchy"} } ) self.assert_expression( Not( Range(f.post_date, from_='2010-03-01', to='2010-04-01'), ), { "not": { "range": { "post_date": { "from": "2010-03-01", "to": "2010-04-01" } } } } ) self.assert_expression( Not( Range(f.post_date, from_='2010-03-01', to='2010-04-01'), _cache=True, ), { "not": { "filter": { "range": { "post_date": { "from": "2010-03-01", "to": "2010-04-01" } } }, "_cache": True } } ) self.assert_expression( Sort(f.post_date), "post_date" ) self.assert_expression( Sort(f.age, 'desc'), { "age": "desc" } ) self.assert_expression( Sort(f.price, 'asc', mode='avg'), { "price": { "order": "asc", "mode": "avg" } } ) self.assert_expression( Sort( f.offer.price.sort, 'asc', mode='avg', nested_filter=Term(f.offer.color, 'blue') ), { "offer.price.sort": { "order": "asc", "mode": "avg", "nested_filter": { "term": {"offer.color": "blue"} } } } ) self.assert_expression( SpanFirst(SpanTerm(f.user, 'kimchy'), end=3), { "span_first": { "match": { "span_term": {"user": "******"} }, "end": 3 } } ) self.assert_expression( SpanMulti(Prefix(f.user, 'ki', boost=1.08)), { "span_multi": { "match": { "prefix": { "user": {"value": "ki", "boost": 1.08} } } } } ) self.assert_expression( SpanNear( [SpanTerm(f.field, 'value1'), SpanTerm(f.field, 'value2'), SpanTerm(f.field, 'value3')], slop=12, in_order=False, collect_payloads=False, ), { "span_near": { "clauses": [ {"span_term": {"field": "value1"}}, {"span_term": {"field": "value2"}}, {"span_term": {"field": "value3"}} ], "slop": 12, "in_order": False, "collect_payloads": False } } ) self.assert_expression( SpanNot( SpanTerm(f.field1, 'hoya'), SpanNear([SpanTerm(f.field1, 'la'), SpanTerm(f.field1, 'hoya')], slop=0, in_order=True), ), { "span_not": { "include": { "span_term": {"field1": "hoya"} }, "exclude": { "span_near": { "clauses": [ {"span_term": {"field1": "la"}}, {"span_term": {"field1": "hoya"}} ], "slop": 0, "in_order": True } } } } ) self.assert_expression( SpanOr( [ SpanTerm(f.field, 'value1'), SpanTerm(f.field, 'value2'), SpanTerm(f.field, 'value3') ], boost=2, ), { "span_or": { "clauses": [ {"span_term": {"field": "value1"}}, {"span_term": {"field": "value2"}}, {"span_term": {"field": "value3"}} ], "boost": 2 } } ) self.assert_expression( Limit(1000), { "limit": { "value": 1000 } } ) e = Nested( self.index.movie.stars, Match(self.index.movie.stars.full_name, 'Will Smith'), score_mode='max', ) self.assert_expression( e, { "nested": { "path": "stars", "query": { "match": { "stars.full_name": "Will Smith" } }, "score_mode": "max" } } ) self.assertEqual( e._collect_doc_classes(), {self.index.movie} ) e = HasParent( self.index.blog.tag == 'something', parent_type=self.index.blog, score_mode='score', ) self.assert_expression( e, { "has_parent": { "parent_type": "blog", "query": { "term": { "tag": "something" } }, "score_mode": "score" } } ) self.assertEqual( e._collect_doc_classes(), set() ) e = HasParent( self.index.blog.tag == 'something', score_mode='score', ) self.assert_expression( e, { "has_parent": { "parent_type": "blog", "query": { "term": { "tag": "something" } }, "score_mode": "score" } } ) self.assertEqual( e._collect_doc_classes(), set() ) e = HasChild( self.index.blog_tag.tag == 'something', type=self.index.blog_tag, score_mode='sum', ) self.assert_expression( e, { "has_child": { "type": "blog_tag", "query": { "term": { "tag": "something" } }, "score_mode": "sum" } } ) self.assertEqual( e._collect_doc_classes(), set() ) e = HasChild( self.index.blog_tag.tag == 'something', score_mode='sum', ) self.assert_expression( e, { "has_child": { "type": "blog_tag", "query": { "term": { "tag": "something" } }, "score_mode": "sum" } } ) self.assertEqual( e._collect_doc_classes(), set() )
def _get_filter_expression(self, attr_id: int, values: ParamValues) -> t.Optional[Expression]: gte = self._parse_last_value(values, 'gte') gte_value = None if gte is not None: gte_value = merge_attr_value_float(attr_id, gte) lte = self._parse_last_value(values, 'lte') lte_value = None if lte is not None: lte_value = merge_attr_value_float(attr_id, lte) if gte is not None and lte is not None: if gte >= 0.0 and lte >= 0.0: return Range(self.field, gte=gte_value, lte=lte_value) elif gte < 0.0 and lte < 0.0: return Range(self.field, gte=lte_value, lte=gte_value) elif gte < 0.0 and lte >= 0: return Bool.should( Range(self.field, gte=self._minus_zero(attr_id), lte=gte_value), Range(self.field, gte=self._plus_zero(attr_id), lte=lte_value), ) else: return Bool.must( Range(self.field, gte=gte_value, lte=self._plus_inf(attr_id)), Range(self.field, gte=lte_value, lte=self._minus_inf(attr_id)), ) if gte is not None: if gte >= 0.0: return Range(self.field, gte=gte_value, lte=self._plus_inf(attr_id)) else: return Bool.should( Range(self.field, gte=self._minus_zero(attr_id), lte=gte_value), Range(self.field, gte=self._plus_zero(attr_id), lte=self._plus_inf(attr_id)), ) if lte is not None: if lte < 0.0: return Range(self.field, gte=lte_value, lte=self._minus_inf(attr_id)) else: return Bool.should( Range(self.field, gte=self._plus_zero(attr_id), lte=lte_value), Range(self.field, gte=self._minus_zero(attr_id), lte=self._minus_inf(attr_id)), ) return None