def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{ u'count': 2, u'term': u'foo' }]) self.assertEquals(resultset.facets.tag.terms, [{ u'count': 2, u'term': u'foo' }]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter('tag', 'foo')) q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter('tag', 'foo')) q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter('tag', 'foo')) q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q3, q4)
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{ u'count': 2, u'term': u'foo' }, { u'count': 1, u'term': u'bar' }]) q2 = MatchAllQuery() q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q2, q4)
def test_nested_filter(self): q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc'), PrefixQuery('shares.role', '11') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) print ', '.join([r['body'] for r in resultset]) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc.de'), PrefixQuery('shares.role', '111') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 1) print ', '.join([r['body'] for r in resultset]) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc.de.1'), PrefixQuery('shares.role', '11') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 0) print ', '.join([r['body'] for r in resultset]) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'shares', BoolQuery(must=[ PrefixQuery('shares.orgid', 'abc'), PrefixQuery('shares.role', '111') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) print ', '.join([r['body'] for r in resultset]) print
def test_iterator(self): resultset = self.conn.search(Search(MatchAllQuery(), size=20), self.index_name, self.document_type) self.assertEqual(len([p for p in resultset]), 20) resultset = self.conn.search(Search(MatchAllQuery(), size=10), self.index_name, self.document_type) self.assertEqual(len([p for p in resultset[:10]]), 10) self.assertEqual(resultset[10].uuid, "11111") self.assertEqual(resultset.total, 1000)
def test_GeoDistanceFilter(self): gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km") q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEqual(resultset.total, 1) gq = GeoDistanceFilter("pin.location", [70, 40], "200km") q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEqual(resultset.total, 1)
def test_GeoBoundingBoxFilter(self): gq = GeoBoundingBoxFilter("pin.location", location_tl={"lat" : 40.717, "lon" : 70.99}, location_br={"lat" : 40.03, "lon" : 72.0}) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717], [74.1, 40.03]) q = FilteredQuery(MatchAllQuery(), gq) result2 = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(result2.total, 1)
def get_query(s): import re queries = [] i = 0 freetext = "" for mat in re.finditer(r'(?P<name>\S+):(?P<value>"[^"]+"|\S+)\s*', s): freetext += s[i:mat.start()] i = mat.end() q = mat.groupdict() value = q['value'].strip('"') if value.endswith("*"): queries.append(WildcardQuery(field=q['name'], value=value)) else: queries.append(TermQuery(field=q['name'], value=value)) freetext += s[i:] freetext = freetext.strip() if freetext: if freetext == '*': queries.append(MatchAllQuery()) else: queries.append(TextQuery("_all", freetext, operator='and')) if len(queries) == 1: return queries[0] else: q = BoolQuery() for query in queries: q.add_must(query) return q
def create_query(self, sql): # query filter. Rest are ignored. if "termquery" in sql.keys(): # Create Term filter terms = sql['termquery'] for term in terms: self._query_field.append(term.keys()[0]) _tf = TermFilter(term.keys()[0], term[term.keys()[0]]) self._filters.append(_tf) if "geodistancefilter" in sql.keys(): # Prepare distance filter geoterm = sql['geodistancefilter'] geofieldname = geoterm['field'] geofieldvalue = geoterm['fieldvalue'] _gf = GeoDistanceFilter(geofieldname, geofieldvalue, geoterm['distance'], 'arc', 'km') self._filters.append(_gf) self._query_field.append(geofieldname) if "optype" in sql.keys(): # Condition filters if sql['optype'].lower() == 'and': self._filtertype = ANDFilter(self._filters) if sql['optype'].lower() == 'or': self._filtertype = ORFilter(self._filters) else: self._filtertype = ANDFilter(self._filters) return FilteredQuery(MatchAllQuery(), self._filtertype)
def test_iterator_offset(self): # Query for a block of 10, starting at position 10: # resultset = self.conn.search(Search( MatchAllQuery(), start=10, size=10, sort={'position': { 'order': 'asc' }}), self.index_name, self.document_type, start=10, size=10) # Ensure that there are 1000 results: # self.assertEqual(len(resultset), 1000) # Now check that we actually have records 10-19, rather than 0-9: # position = 0 for r in resultset: self.assertEqual(r.position, position + 10) position += 1
def test_GeoPolygonFilter(self): gq = GeoPolygonFilter("pin.location", [{"lat" : 50, "lon" :-30}, {"lat" : 30, "lon" :-80}, {"lat" : 80, "lon" :-90}] ) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoPolygonFilter("pin.location", [[ -30, 50], [ -80, 30], [ -90, 80]] ) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1)
def test_nested_agg(self): q = MatchAllQuery() q = q.search() nested = NestedAgg(name='nested', path='resellers') q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
def test_sorting_by_geolocation(self): search = Search(MatchAllQuery()) search.sort.add(GeoSortOrder(field='location', lat=1, lon=1)) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['1', '2', '3'])
def test_sorting_by_script(self): search = Search(MatchAllQuery()) search.sort.add(ScriptSortOrder("1.0/doc['foo'].value", type='number')) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['3', '2', '1'])
def test_sorting_by_foo(self): search = Search(MatchAllQuery()) search.sort.add(SortOrder('foo', order='desc')) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['3', '2', '1'])
def test_facet_filter_is_serialized_correctly(self): query = MatchAllQuery().search(size=0) query.facet.add( TermFacet(field='topic', facet_filter=BoolFilter( must_not=TermQuery(field='reviewed', value=True)))) serialized = query.serialize() self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
def test_missing_agg(self): q = MatchAllQuery() q = q.search() missing = MissingAgg(name='missing', field='integer') q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
def records(self): if not self.connection: raise RuntimeError("Stream is not initialized") from pyes.query import MatchAllQuery results = self.connection.search(MatchAllQuery(), search_type="scan", timeout="5m", size="200") return ESRecordIterator(results, self.expand)
def test_max_agg(self): q = MatchAllQuery() q = q.search() max_agg = MaxAgg(name='max', field='position') q.agg.add(max_agg) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.max, {u'value': 3})
def dump_docs(fp, conn, index_name, doc_type, scroll='5m', encoding='utf8'): q = MatchAllQuery() for result in conn.search(q, indices=[index_name], doc_types=[doc_type], scan=True, scroll=scroll): fp.write(json.dumps(result, encoding=encoding)) fp.write('\n')
def test_nested_filter(self): q = FilteredQuery(MatchAllQuery(), TermFilter('_all', 'n_value1_1')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 2) q = FilteredQuery(MatchAllQuery(), TermFilter('nested1.n_field1', 'n_value1_1')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 0) q = FilteredQuery(MatchAllQuery(), TermFilter('nested1.n_field1', 'n_value1_1')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 0) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'nested1', BoolQuery(must=[TermQuery('nested1.n_field1', 'n_value1_1')]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 2) q = FilteredQuery( MatchAllQuery(), NestedFilter( 'nested1', BoolQuery(must=[ TermQuery('nested1.n_field1', 'n_value1_1'), TermQuery('nested1.n_field2', 'n_value2_1') ]))) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 1)
def rows(self): if not self.connection: raise RuntimeError("Stream is not initialized") from pyes.query import MatchAllQuery fields = self.field_names results = self.connection.search(MatchAllQuery(), search_type="scan", timeout="5m", size="200") return ESRowIterator(results, fields)
def test_reverse_nested_agg(self): q = MatchAllQuery() q = q.search() reverse_nested = ReverseNestedAgg(name='reverse', field='id') nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested]) q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested['doc_count'], 4) self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
def test_date_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, RangeFilter(qrange=ESRange('date', datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False))) q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 2) self.assertEquals(result['facets']['tag']['terms'], [{ u'count': 2, u'term': u'foo' }])
def add_filters(filters): # filters flist = [] for field, term in filters: flist.append(TermFilter(field, term)) if flist: f = ANDFilter(flist) q = FilteredQuery(q, f) else: q = MatchAllQuery() return q
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append( DateHistogramFacet('date_facet', field='date', interval='month')) result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['date_facet']['entries'], [{ u'count': 2, u'time': 1301616000000 }, { u'count': 1, u'time': 1304208000000 }]) self.assertEquals( datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals( datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
import json from pyes import ES, Search from pyes.aggs import TermsAgg, SumAgg, FilterAgg, DateHistogramAgg from pyes.exceptions import IndexMissingException from pyes.query import MatchAllQuery, BoolQuery, RangeQuery, ESRange, TermQuery from pyes.filters import TermFilter, TermsFilter match_all = MatchAllQuery() sub_domain_agg = TermsAgg('domain_agg', field='json_data.etp_domain_id', size=20000) client_agg = TermsAgg('client_agg', field='json_data.etp_client_id', sub_aggs=[sub_domain_agg], size=20000) search_query = Search(query=match_all, size=0) search_query.agg.add(client_agg) print(json.dumps(search_query.serialize(), indent=2))