Beispiel #1
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{
            u'count': 2,
            u'term': u'foo'
        }])
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)
Beispiel #2
0
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }, {
            u'count': 1,
            u'term': u'bar'
        }])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q2, q4)
Beispiel #3
0
    def test_nested_filter(self):

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc'),
                    PrefixQuery('shares.role', '11')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc.de'),
                    PrefixQuery('shares.role', '111')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 1)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc.de.1'),
                    PrefixQuery('shares.role', '11')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 0)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc'),
                    PrefixQuery('shares.role', '111')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        print ', '.join([r['body'] for r in resultset])

        print
Beispiel #4
0
 def test_iterator(self):
     resultset = self.conn.search(Search(MatchAllQuery(), size=20),
                                  self.index_name, self.document_type)
     self.assertEqual(len([p for p in resultset]), 20)
     resultset = self.conn.search(Search(MatchAllQuery(), size=10),
                                  self.index_name, self.document_type)
     self.assertEqual(len([p for p in resultset[:10]]), 10)
     self.assertEqual(resultset[10].uuid, "11111")
     self.assertEqual(resultset.total, 1000)
Beispiel #5
0
    def test_GeoDistanceFilter(self):
        gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEqual(resultset.total, 1)

        gq = GeoDistanceFilter("pin.location", [70, 40], "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEqual(resultset.total, 1)
Beispiel #6
0
    def test_GeoBoundingBoxFilter(self):
        gq = GeoBoundingBoxFilter("pin.location", location_tl={"lat" : 40.717, "lon" : 70.99}, location_br={"lat" : 40.03, "lon" : 72.0})
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717], [74.1, 40.03])
        q = FilteredQuery(MatchAllQuery(), gq)
        result2 = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(result2.total, 1)
Beispiel #7
0
def get_query(s):
    import re
    queries = []

    i = 0
    freetext = ""
    for mat in re.finditer(r'(?P<name>\S+):(?P<value>"[^"]+"|\S+)\s*', s):
        freetext += s[i:mat.start()]
        i = mat.end()
        q = mat.groupdict()
        value = q['value'].strip('"')
        if value.endswith("*"):
            queries.append(WildcardQuery(field=q['name'], value=value))
        else:
            queries.append(TermQuery(field=q['name'], value=value))
    freetext += s[i:]
    freetext = freetext.strip()
    if freetext:
        if freetext == '*':
            queries.append(MatchAllQuery())
        else:
            queries.append(TextQuery("_all", freetext, operator='and'))

    if len(queries) == 1:
        return queries[0]
    else:
        q = BoolQuery()
        for query in queries:
            q.add_must(query)
        return q
Beispiel #8
0
 def create_query(self, sql):
     # query  filter. Rest are ignored.
     if "termquery" in sql.keys():
         # Create Term filter
         terms = sql['termquery']
         for term in terms:
             self._query_field.append(term.keys()[0])
             _tf = TermFilter(term.keys()[0], term[term.keys()[0]])
             self._filters.append(_tf)
     if "geodistancefilter" in sql.keys():
         # Prepare distance filter
         geoterm = sql['geodistancefilter']
         geofieldname = geoterm['field']
         geofieldvalue = geoterm['fieldvalue']
         _gf = GeoDistanceFilter(geofieldname,
                                 geofieldvalue,
                                 geoterm['distance'],
                                 'arc', 'km')
         self._filters.append(_gf)
         self._query_field.append(geofieldname)
     if "optype" in sql.keys():
         # Condition filters
         if sql['optype'].lower() == 'and':
             self._filtertype = ANDFilter(self._filters)
         if sql['optype'].lower() == 'or':
             self._filtertype = ORFilter(self._filters)
     else:
         self._filtertype = ANDFilter(self._filters)
     return FilteredQuery(MatchAllQuery(), self._filtertype)
Beispiel #9
0
    def test_iterator_offset(self):
        # Query for a block of 10, starting at position 10:
        #
        resultset = self.conn.search(Search(
            MatchAllQuery(),
            start=10,
            size=10,
            sort={'position': {
                'order': 'asc'
            }}),
                                     self.index_name,
                                     self.document_type,
                                     start=10,
                                     size=10)

        # Ensure that there are 1000 results:
        #
        self.assertEqual(len(resultset), 1000)

        # Now check that we actually have records 10-19, rather than 0-9:
        #
        position = 0
        for r in resultset:
            self.assertEqual(r.position, position + 10)
            position += 1
Beispiel #10
0
    def test_GeoPolygonFilter(self):
        gq = GeoPolygonFilter("pin.location", [{"lat" : 50, "lon" :-30},
                                                {"lat" : 30, "lon" :-80},
                                                {"lat" : 80, "lon" :-90}]
                                                )
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoPolygonFilter("pin.location", [[ -30, 50],
                                              [ -80, 30],
                                              [ -90, 80]]
                                                )
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)
Beispiel #11
0
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name='nested', path='resellers')
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
Beispiel #12
0
 def test_sorting_by_geolocation(self):
     search = Search(MatchAllQuery())
     search.sort.add(GeoSortOrder(field='location', lat=1, lon=1))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['1', '2', '3'])
Beispiel #13
0
 def test_sorting_by_script(self):
     search = Search(MatchAllQuery())
     search.sort.add(ScriptSortOrder("1.0/doc['foo'].value", type='number'))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['3', '2', '1'])
Beispiel #14
0
 def test_sorting_by_foo(self):
     search = Search(MatchAllQuery())
     search.sort.add(SortOrder('foo', order='desc'))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['3', '2', '1'])
Beispiel #15
0
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(
         TermFacet(field='topic',
                   facet_filter=BoolFilter(
                       must_not=TermQuery(field='reviewed', value=True))))
     serialized = query.serialize()
     self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
Beispiel #16
0
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name='missing', field='integer')
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
Beispiel #17
0
 def records(self):
     if not self.connection:
         raise RuntimeError("Stream is not initialized")
     from pyes.query import MatchAllQuery
     results = self.connection.search(MatchAllQuery(),
                                      search_type="scan",
                                      timeout="5m",
                                      size="200")
     return ESRecordIterator(results, self.expand)
Beispiel #18
0
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name='max', field='position')
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u'value': 3})
Beispiel #19
0
def dump_docs(fp, conn, index_name, doc_type, scroll='5m', encoding='utf8'):
    q = MatchAllQuery()
    for result in conn.search(q,
                              indices=[index_name],
                              doc_types=[doc_type],
                              scan=True,
                              scroll=scroll):
        fp.write(json.dumps(result, encoding=encoding))
        fp.write('\n')
Beispiel #20
0
    def test_nested_filter(self):
        q = FilteredQuery(MatchAllQuery(), TermFilter('_all', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 2)

        q = FilteredQuery(MatchAllQuery(),
                          TermFilter('nested1.n_field1', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 0)

        q = FilteredQuery(MatchAllQuery(),
                          TermFilter('nested1.n_field1', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 0)

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'nested1',
                BoolQuery(must=[TermQuery('nested1.n_field1', 'n_value1_1')])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 2)

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'nested1',
                BoolQuery(must=[
                    TermQuery('nested1.n_field1', 'n_value1_1'),
                    TermQuery('nested1.n_field2', 'n_value2_1')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 1)
Beispiel #21
0
 def rows(self):
     if not self.connection:
         raise RuntimeError("Stream is not initialized")
     from pyes.query import MatchAllQuery
     fields = self.field_names
     results = self.connection.search(MatchAllQuery(),
                                      search_type="scan",
                                      timeout="5m",
                                      size="200")
     return ESRowIterator(results, fields)
Beispiel #22
0
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name='reverse', field='id')
        nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested['doc_count'], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
Beispiel #23
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
Beispiel #24
0
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 2)
     self.assertEquals(result['facets']['tag']['terms'], [{
         u'count': 2,
         u'term': u'foo'
     }])
Beispiel #25
0
def add_filters(filters):
    # filters
    flist = []
    for field, term in filters:
        flist.append(TermFilter(field, term))

    if flist:
        f = ANDFilter(flist)
        q = FilteredQuery(q, f)
    else:
        q = MatchAllQuery()

    return q
Beispiel #26
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Beispiel #27
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(
         DateHistogramFacet('date_facet', field='date', interval='month'))
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'],
                       [{
                           u'count': 2,
                           u'time': 1301616000000
                       }, {
                           u'count': 1,
                           u'time': 1304208000000
                       }])
     self.assertEquals(
         datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(
         datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
import json
from pyes import ES, Search
from pyes.aggs import TermsAgg, SumAgg, FilterAgg, DateHistogramAgg
from pyes.exceptions import IndexMissingException
from pyes.query import MatchAllQuery, BoolQuery, RangeQuery, ESRange, TermQuery
from pyes.filters import TermFilter, TermsFilter

match_all = MatchAllQuery()
sub_domain_agg = TermsAgg('domain_agg',
                          field='json_data.etp_domain_id',
                          size=20000)
client_agg = TermsAgg('client_agg',
                      field='json_data.etp_client_id',
                      sub_aggs=[sub_domain_agg],
                      size=20000)

search_query = Search(query=match_all, size=0)
search_query.agg.add(client_agg)

print(json.dumps(search_query.serialize(), indent=2))