def term_facet(host='localhost:9200', terms=['bibleverse'], _type='habakkuk', date_filter=[], size=10): ret = [] conn = ES(host) q = MatchAllQuery() if date_filter: start,end = date_filter q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False))) q = q.search(size=0) for term in terms: q.facet.add_term_facet(term,order='count',size=size) print json.dumps(json.loads(q.to_search_json()),indent=2) resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type]) for facet in resultset.facets: print "Total",facet,resultset.facets[facet]['total'] for row in resultset.facets[facet]['terms']: print "\t",row['term'],row['count'] ret.append((facet,row['term'])) return ret
def facets(host='localhost:9200', facet_terms=['bibleverse'], _type='habakkuk', date_filter=[], size=10): ret = {} conn = ES(host) q = MatchAllQuery() if date_filter: start,end = date_filter q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date', start.isoformat(), end.isoformat(), include_upper=False))) q = q.search(size=0) for term in facet_terms: q.facet.add_term_facet(term,order='count',size=size) es_logger.info(q.serialize()) resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type]) for facet in resultset.facets: ret[facet] = [] for row in resultset.facets[facet]['terms']: ret[facet].append({"value":row['term'],"count":row['count']}) logger.debug("facets return|'%s'"%json.dumps(ret)) return ret
def dump(start,end,backupdir,eshost): conn = ES(eshost) out = file('/tmp/out.json','w') _type = 'habakkuk' q = MatchAllQuery() q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False))) q = q.search() # print json.dumps(json.loads(q.to_search_json()),indent=2) resultset = conn.search(query=q,indices=_type+"-*", doc_types=[_type], scan=True) cnt=0 if not resultset.total: sys.stderr.write("no data for %s - %s\n"%(start,end)) return try: sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name)) while True: r = resultset.next() cnt+=1 out.write(json.dumps(r)+'\n') except StopIteration: pass out.close() # gzip ext = datetime.strftime(start,'%Y-%m-%d') backup = os.path.join(backupdir,"habakkuk-%s.json.gz"%ext) f_in = open(out.name,'rb') f_out = gzip.open(backup,'wb') f_out.writelines(f_in) f_out.close() f_out.close() sys.stderr.write("Created %s\n"%backup)
def test_facet_filter_is_serialized_correctly(self): query = MatchAllQuery().search(size=0) query.facet.add( TermFacet(field="topic", facet_filter=BoolFilter(must_not=TermQuery(field="reviewed", value=True))) ) serialized = query.serialize() self.assertTrue(serialized["facets"]["topic"]["facet_filter"]["bool"])
def test_nested_filter(self): q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc'), PrefixQuery('shares.role', '11') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) print ', '.join([r['body'] for r in resultset]) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc.de'), PrefixQuery('shares.role', '111') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 1) print ', '.join([r['body'] for r in resultset]) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc.de.1'), PrefixQuery('shares.role', '11') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 0) print ', '.join([r['body'] for r in resultset]) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc'), PrefixQuery('shares.role', '111') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) print ', '.join([r['body'] for r in resultset]) print
def test_nested_agg(self): q = MatchAllQuery() q = q.search() nested = NestedAgg(name='nested', path='resellers') q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}, {u'count': 1, u'term': u'bar'}])
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['tag']['terms'], [{u'count': 2, u'term': u'foo'}, {u'count': 1, u'term': u'bar'}])
def test_facet_filter_is_serialized_correctly(self): query = MatchAllQuery().search(size=0) query.facet.add( TermFacet(field='topic', facet_filter=BoolFilter( must_not=TermQuery(field='reviewed', value=True)))) serialized = query.serialize() self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=["test-index"], doc_types=["test-type"]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
def test_nested_agg(self): q = MatchAllQuery() q = q.search() nested = NestedAgg(name="nested", path="resellers") q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested, {u"doc_count": 4})
def test_max_agg(self): q = MatchAllQuery() q = q.search() max_agg = MaxAgg(name='max', field='position') q.agg.add(max_agg) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.max, {u'value': 3})
def test_missing_agg(self): q = MatchAllQuery() q = q.search() missing = MissingAgg(name='missing', field='integer') q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
def test_min_agg(self): q = MatchAllQuery() q = q.search() missing = MinAgg(name="min", field="position") q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.min, {u'value': 1})
def test_missing_agg(self): q = MatchAllQuery() q = q.search() missing = MissingAgg(name="missing", field="integer") q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.missing, {u"doc_count": 1})
def test_iterator(self): resultset = self.conn.search(Search(MatchAllQuery(), size=20), self.index_name, self.document_type) self.assertEqual(len([p for p in resultset]), 20) resultset = self.conn.search(Search(MatchAllQuery(), size=10), self.index_name, self.document_type) self.assertEqual(len([p for p in resultset[:10]]), 10) self.assertEqual(resultset[10].uuid, "11111") self.assertEqual(resultset.total, 1000)
def test_max_agg(self): q = MatchAllQuery() q = q.search() max_agg = MaxAgg(name="max", field="position") q.agg.add(max_agg) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.max, {u"value": 3})
def test_GeoDistanceFilter(self): gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km") q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEqual(resultset.total, 1) gq = GeoDistanceFilter("pin.location", [70, 40], "200km") q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEqual(resultset.total, 1)
def test_GeoBoundingBoxFilter(self): gq = GeoBoundingBoxFilter("pin.location", location_tl={"lat" : 40.717, "lon" : 70.99}, location_br={"lat" : 40.03, "lon" : 72.0}) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717], [74.1, 40.03]) q = FilteredQuery(MatchAllQuery(), gq) result2 = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(result2.total, 1)
def get_top_authors(): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('author') es = get_connection() facets = es.search(q, ELASTICSEARCH_INDEX, 'post').facets authors = [] for term in facets['author']['terms']: authors.append(get_author(id=term['term'])) return authors
def test_reverse_nested_agg(self): q = MatchAllQuery() q = q.search() reverse_nested = ReverseNestedAgg(name='reverse', field='id') nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested]) q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested['doc_count'], 4) self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
def test_reverse_nested_agg(self): q = MatchAllQuery() q = q.search() reverse_nested = ReverseNestedAgg(name="reverse", field="id") nested = NestedAgg(name="nested", path="resellers", sub_aggs=[reverse_nested]) q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested["doc_count"], 4) self.assertEqual(resultset.aggs.nested.reverse, {u"doc_count": 2})
def test_date_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, RangeFilter(qrange=ESRange('date', datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False))) q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 2) self.assertEquals(result['facets']['tag']['terms'], [{ u'count': 2, u'term': u'foo' }])
def test_date_facet_filter(self): q = MatchAllQuery() q = FilteredQuery( q, RangeFilter( qrange=ESRange("date", datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False) ), ) q = q.search() q.facet.facets.append(DateHistogramFacet("date_facet", field="date", interval="month")) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets["date_facet"]["entries"], [{u"count": 2, u"time": 1301616000000}])
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.fromtimestamp(1301616000000/1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.fromtimestamp(1304208000000/1000.).date(), datetime.date(2011, 05, 01))
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.utcfromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.utcfromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def test_GeoPolygonFilter(self): gq = GeoPolygonFilter("pin.location", [{"lat" : 50, "lon" :-30}, {"lat" : 30, "lon" :-80}, {"lat" : 80, "lon" :-90}] ) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoPolygonFilter("pin.location", [[ -30, 50], [ -80, 30], [ -90, 80]] ) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1)
def create_query(self, sql): # query filter. Rest are ignored. if "termquery" in sql.keys(): # Create Term filter terms = sql['termquery'] for term in terms: self._query_field.append(term.keys()[0]) _tf = TermFilter(term.keys()[0], term[term.keys()[0]]) self._filters.append(_tf) if "geodistancefilter" in sql.keys(): # Prepare distance filter geoterm = sql['geodistancefilter'] geofieldname = geoterm['field'] geofieldvalue = geoterm['fieldvalue'] _gf = GeoDistanceFilter(geofieldname, geofieldvalue, geoterm['distance'], 'arc', 'km') self._filters.append(_gf) self._query_field.append(geofieldname) if "optype" in sql.keys(): # Condition filters if sql['optype'].lower() == 'and': self._filtertype = ANDFilter(self._filters) if sql['optype'].lower() == 'or': self._filtertype = ORFilter(self._filters) else: self._filtertype = ANDFilter(self._filters) return FilteredQuery(MatchAllQuery(), self._filtertype)
def get_query(s): import re queries = [] i = 0 freetext = "" for mat in re.finditer(r'(?P<name>\S+):(?P<value>"[^"]+"|\S+)\s*', s): freetext += s[i:mat.start()] i = mat.end() q = mat.groupdict() value = q['value'].strip('"') if value.endswith("*"): queries.append(WildcardQuery(field=q['name'], value=value)) else: queries.append(TermQuery(field=q['name'], value=value)) freetext += s[i:] freetext = freetext.strip() if freetext: if freetext == '*': queries.append(MatchAllQuery()) else: queries.append(TextQuery("_all", freetext, operator='and')) if len(queries) == 1: return queries[0] else: q = BoolQuery() for query in queries: q.add_must(query) return q
def test_iterator_offset(self): # Query for a block of 10, starting at position 10: # resultset = self.conn.search(Search( MatchAllQuery(), start=10, size=10, sort={'position': { 'order': 'asc' }}), self.index_name, self.document_type, start=10, size=10) # Ensure that there are 1000 results: # self.assertEqual(len(resultset), 1000) # Now check that we actually have records 10-19, rather than 0-9: # position = 0 for r in resultset: self.assertEqual(r.position, position + 10) position += 1
def test_sorting_by_geolocation(self): search = Search(MatchAllQuery()) search.sort.add(GeoSortOrder(field='location', lat=1, lon=1)) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['1', '2', '3'])
def test_sorting_by_script(self): search = Search(MatchAllQuery()) search.sort.add(ScriptSortOrder("1.0/doc['foo'].value", type='number')) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['3', '2', '1'])
def test_sorting_by_foo(self): search = Search(MatchAllQuery()) search.sort.add(SortOrder('foo', order='desc')) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['3', '2', '1'])
def records(self): if not self.connection: raise RuntimeError("Stream is not initialized") from pyes.query import MatchAllQuery results = self.connection.search(MatchAllQuery(), search_type="scan", timeout="5m", size="200") return ESRecordIterator(results, self.expand)
def dump_docs(fp, conn, index_name, doc_type, scroll='5m', encoding='utf8'): q = MatchAllQuery() for result in conn.search(q, indices=[index_name], doc_types=[doc_type], scan=True, scroll=scroll): fp.write(json.dumps(result, encoding=encoding)) fp.write('\n')
def rows(self): if not self.connection: raise RuntimeError("Stream is not initialized") from pyes.query import MatchAllQuery fields = self.field_names results = self.connection.search(MatchAllQuery(), search_type="scan", timeout="5m", size="200") return ESRowIterator(results, fields)
def test_nested_filter(self): q = FilteredQuery(MatchAllQuery(), TermFilter('_all', 'n_value1_1')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 2) q = FilteredQuery(MatchAllQuery(), TermFilter('nested1.n_field1', 'n_value1_1')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 0) q = FilteredQuery(MatchAllQuery(), TermFilter('nested1.n_field1', 'n_value1_1')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 0) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'nested1', BoolQuery(must=[TermQuery('nested1.n_field1', 'n_value1_1')]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 2) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'nested1', BoolQuery(must=[ TermQuery('nested1.n_field1', 'n_value1_1'), TermQuery('nested1.n_field2', 'n_value2_1') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 1)
def add_filters(filters): # filters flist = [] for field, term in filters: flist.append(TermFilter(field, term)) if flist: f = ANDFilter(flist) q = FilteredQuery(q, f) else: q = MatchAllQuery() return q
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append( DateHistogramFacet('date_facet', field='date', interval='month')) result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['date_facet']['entries'], [{ u'count': 2, u'time': 1301616000000 }, { u'count': 1, u'time': 1304208000000 }]) self.assertEquals( datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals( datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def dump_topics(backupdir, eshost, _type, indices="topics-all"): conn = ES(eshost) out = file('/tmp/out.json','w') q = MatchAllQuery() q = q.search() resultset = conn.search(query=q,indices=indices, doc_types=[_type], scan=True) cnt=0 if not resultset.total: sys.stderr.write("no data\n") return try: sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name)) while True: r = resultset.next() r['_id'] = r._meta.id cnt+=1 out.write(json.dumps(r)+'\n') except StopIteration: pass out.close() # gzip backup = os.path.join(backupdir,"topics.{}.json.gz".format(_type)) f_in = open(out.name,'rb') f_out = gzip.open(backup,'wb') f_out.writelines(f_in) f_out.close() f_out.close() sys.stderr.write("Created %s\n"%backup)
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet("tag") resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}, {u"count": 1, u"term": u"bar"}]) q2 = MatchAllQuery() q2 = q2.search() q2.facet.add_term_facet("tag") q3 = MatchAllQuery() q3 = q3.search() q3.facet.add_term_facet("tag") self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = q4.search() q4.facet.add_term_facet("bag") self.assertNotEquals(q2, q4)
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}]) self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter('tag', 'foo')) q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter('tag', 'foo')) q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter('tag', 'foo')) q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q3, q4)
def test_facet_filter_is_serialized_correctly(self): query = MatchAllQuery().search(size=0) query.facet.add(TermFacet(field='topic', facet_filter=BoolFilter(must_not=TermQuery(field='reviewed', value=True)))) serialized = query.serialize() self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter("tag", "foo")) q = q.search() q.facet.add_term_facet("tag") resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets["tag"]["terms"], [{u"count": 2, u"term": u"foo"}]) self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter("tag", "foo")) q2 = q2.search() q2.facet.add_term_facet("tag") q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter("tag", "foo")) q3 = q3.search() q3.facet.add_term_facet("tag") self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter("tag", "foo")) q4 = q4.search() q4.facet.add_term_facet("bag") self.assertNotEquals(q3, q4)
import json from pyes import ES, Search from pyes.aggs import TermsAgg, SumAgg, FilterAgg, DateHistogramAgg from pyes.exceptions import IndexMissingException from pyes.query import MatchAllQuery, BoolQuery, RangeQuery, ESRange, TermQuery from pyes.filters import TermFilter, TermsFilter match_all = MatchAllQuery() sub_domain_agg = TermsAgg('domain_agg', field='json_data.etp_domain_id', size=20000) client_agg = TermsAgg('client_agg', field='json_data.etp_client_id', sub_aggs=[sub_domain_agg], size=20000) search_query = Search(query=match_all, size=0) search_query.agg.add(client_agg) print(json.dumps(search_query.serialize(), indent=2))
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{ u'count': 2, u'term': u'foo' }, { u'count': 1, u'term': u'bar' }]) q2 = MatchAllQuery() q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q2, q4)
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{ u'count': 2, u'term': u'foo' }]) self.assertEquals(resultset.facets.tag.terms, [{ u'count': 2, u'term': u'foo' }]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter('tag', 'foo')) q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter('tag', 'foo')) q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter('tag', 'foo')) q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q3, q4)