コード例 #1
0
ファイル: bible_facet.py プロジェクト: gregors/habakkuk-alpha
def term_facet(host='localhost:9200',
               terms=['bibleverse'],
               _type='habakkuk',
               date_filter=[],
               size=10):
    ret = []
    conn = ES(host)
    q = MatchAllQuery()
    if date_filter:
        start,end = date_filter
        q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))

    q = q.search(size=0)
    for term in terms:
        q.facet.add_term_facet(term,order='count',size=size)
        
    print json.dumps(json.loads(q.to_search_json()),indent=2)

    resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
    for facet in resultset.facets:
        print "Total",facet,resultset.facets[facet]['total']
        for row in resultset.facets[facet]['terms']:
            print "\t",row['term'],row['count']
            ret.append((facet,row['term']))
        
    return ret
コード例 #2
0
ファイル: bible_facet.py プロジェクト: telvis07/habakkuk_web
def facets(host='localhost:9200',
          facet_terms=['bibleverse'],
          _type='habakkuk',
          date_filter=[],
          size=10):
    ret = {}
    conn = ES(host)
    q = MatchAllQuery()
    if date_filter:
        start,end = date_filter
        q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',
                                                        start.isoformat(),
                                                        end.isoformat(),
                                                        include_upper=False)))

    q = q.search(size=0)
    for term in facet_terms:
        q.facet.add_term_facet(term,order='count',size=size)
        
    es_logger.info(q.serialize())

    resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
    for facet in resultset.facets:
        ret[facet] = []
        for row in resultset.facets[facet]['terms']:
            ret[facet].append({"value":row['term'],"count":row['count']})

    logger.debug("facets return|'%s'"%json.dumps(ret))
    return ret
コード例 #3
0
def dump(start,end,backupdir,eshost):
    conn = ES(eshost)
    out = file('/tmp/out.json','w')
    _type = 'habakkuk'
    q = MatchAllQuery()
    q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))
    q = q.search()
    # print json.dumps(json.loads(q.to_search_json()),indent=2)
    resultset = conn.search(query=q,indices=_type+"-*", doc_types=[_type], scan=True)
    cnt=0
    if not resultset.total:
        sys.stderr.write("no data for %s - %s\n"%(start,end))
        return

    try:
        sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name))
        while True:
            r = resultset.next()
            cnt+=1
            out.write(json.dumps(r)+'\n')
    except StopIteration:
        pass

    out.close()

    # gzip
    ext = datetime.strftime(start,'%Y-%m-%d')
    backup = os.path.join(backupdir,"habakkuk-%s.json.gz"%ext)

    f_in = open(out.name,'rb')
    f_out = gzip.open(backup,'wb')
    f_out.writelines(f_in)
    f_out.close()
    f_out.close()
    sys.stderr.write("Created %s\n"%backup)
コード例 #4
0
ファイル: test_facets.py プロジェクト: Naeka/pyes
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(
         TermFacet(field="topic", facet_filter=BoolFilter(must_not=TermQuery(field="reviewed", value=True)))
     )
     serialized = query.serialize()
     self.assertTrue(serialized["facets"]["topic"]["facet_filter"]["bool"])
コード例 #5
0
    def test_nested_filter(self):

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc'),
                    PrefixQuery('shares.role', '11')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc.de'),
                    PrefixQuery('shares.role', '111')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 1)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc.de.1'),
                    PrefixQuery('shares.role', '11')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 0)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc'),
                    PrefixQuery('shares.role', '111')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        print ', '.join([r['body'] for r in resultset])

        print
コード例 #6
0
ファイル: test_aggs.py プロジェクト: ttimasdf/pyes
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name='nested', path='resellers')
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
コード例 #7
0
ファイル: test_facets.py プロジェクト: EnTeQuAk/pyes
 def test_terms_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.add_term_facet('tag')
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'},
                                                          {u'count': 1, u'term': u'bar'}])
コード例 #8
0
ファイル: facets.py プロジェクト: AndryulE/kitsune
 def test_terms_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['tag']['terms'], [{u'count': 2, u'term': u'foo'},
                                                          {u'count': 1, u'term': u'bar'}])
コード例 #9
0
ファイル: test_facets.py プロジェクト: stevencdavis/pyes
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(
         TermFacet(field='topic',
                   facet_filter=BoolFilter(
                       must_not=TermQuery(field='reviewed', value=True))))
     serialized = query.serialize()
     self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
コード例 #10
0
ファイル: test_facets.py プロジェクト: akheron/pyes
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     resultset = self.conn.search(query=q, indices=["test-index"], doc_types=["test-type"])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
コード例 #11
0
ファイル: test_aggs.py プロジェクト: idon2020/pyes
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name="nested", path="resellers")
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u"doc_count": 4})
コード例 #12
0
ファイル: test_aggs.py プロジェクト: ttimasdf/pyes
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name='max', field='position')
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u'value': 3})
コード例 #13
0
ファイル: test_aggs.py プロジェクト: ttimasdf/pyes
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name='missing', field='integer')
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
コード例 #14
0
ファイル: test_aggs.py プロジェクト: julianhille/pyes
    def test_min_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MinAgg(name="min", field="position")
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.min, {u'value': 1})
コード例 #15
0
ファイル: test_aggs.py プロジェクト: idon2020/pyes
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name="missing", field="integer")
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u"doc_count": 1})
コード例 #16
0
 def test_iterator(self):
     resultset = self.conn.search(Search(MatchAllQuery(), size=20),
                                  self.index_name, self.document_type)
     self.assertEqual(len([p for p in resultset]), 20)
     resultset = self.conn.search(Search(MatchAllQuery(), size=10),
                                  self.index_name, self.document_type)
     self.assertEqual(len([p for p in resultset[:10]]), 10)
     self.assertEqual(resultset[10].uuid, "11111")
     self.assertEqual(resultset.total, 1000)
コード例 #17
0
ファイル: test_aggs.py プロジェクト: idon2020/pyes
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name="max", field="position")
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u"value": 3})
コード例 #18
0
ファイル: test_geoloc.py プロジェクト: trb116/pythonanalyzer
    def test_GeoDistanceFilter(self):
        gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEqual(resultset.total, 1)

        gq = GeoDistanceFilter("pin.location", [70, 40], "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEqual(resultset.total, 1)
コード例 #19
0
    def test_GeoBoundingBoxFilter(self):
        gq = GeoBoundingBoxFilter("pin.location", location_tl={"lat" : 40.717, "lon" : 70.99}, location_br={"lat" : 40.03, "lon" : 72.0})
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717], [74.1, 40.03])
        q = FilteredQuery(MatchAllQuery(), gq)
        result2 = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(result2.total, 1)
コード例 #20
0
def get_top_authors():
    q = MatchAllQuery()
    q = q.search()
    q.facet.add_term_facet('author')
    es = get_connection()
    facets = es.search(q, ELASTICSEARCH_INDEX, 'post').facets
    authors = []
    for term in facets['author']['terms']:
        authors.append(get_author(id=term['term']))
    return authors
コード例 #21
0
ファイル: test_aggs.py プロジェクト: ttimasdf/pyes
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name='reverse', field='id')
        nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested['doc_count'], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
コード例 #22
0
ファイル: test_aggs.py プロジェクト: idon2020/pyes
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name="reverse", field="id")
        nested = NestedAgg(name="nested", path="resellers", sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested["doc_count"], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u"doc_count": 2})
コード例 #23
0
ファイル: test_facets.py プロジェクト: zebuline/pyes
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
コード例 #24
0
ファイル: facets.py プロジェクト: crankycoder/zamboni-lib
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 2)
     self.assertEquals(result['facets']['tag']['terms'], [{
         u'count': 2,
         u'term': u'foo'
     }])
コード例 #25
0
ファイル: test_facets.py プロジェクト: akkumar/pyes
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
コード例 #26
0
ファイル: test_facets.py プロジェクト: Naeka/pyes
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(
         q,
         RangeFilter(
             qrange=ESRange("date", datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False)
         ),
     )
     q = q.search()
     q.facet.facets.append(DateHistogramFacet("date_facet", field="date", interval="month"))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets["date_facet"]["entries"], [{u"count": 2, u"time": 1301616000000}])
コード例 #27
0
ファイル: facets.py プロジェクト: AndryulE/kitsune
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
                                              field='date',
                                              interval='month'))
     result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000},
                                                                   {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000/1000.).date(),
                       datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000/1000.).date(),
                       datetime.date(2011, 05, 01))
コード例 #28
0
ファイル: test_facets.py プロジェクト: akkumar/pyes
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.utcfromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.utcfromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
コード例 #29
0
ファイル: test_facets.py プロジェクト: zebuline/pyes
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
コード例 #30
0
    def test_GeoPolygonFilter(self):
        gq = GeoPolygonFilter("pin.location", [{"lat" : 50, "lon" :-30},
                                                {"lat" : 30, "lon" :-80},
                                                {"lat" : 80, "lon" :-90}]
                                                )
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoPolygonFilter("pin.location", [[ -30, 50],
                                              [ -80, 30],
                                              [ -90, 80]]
                                                )
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)
コード例 #31
0
 def create_query(self, sql):
     # query  filter. Rest are ignored.
     if "termquery" in sql.keys():
         # Create Term filter
         terms = sql['termquery']
         for term in terms:
             self._query_field.append(term.keys()[0])
             _tf = TermFilter(term.keys()[0], term[term.keys()[0]])
             self._filters.append(_tf)
     if "geodistancefilter" in sql.keys():
         # Prepare distance filter
         geoterm = sql['geodistancefilter']
         geofieldname = geoterm['field']
         geofieldvalue = geoterm['fieldvalue']
         _gf = GeoDistanceFilter(geofieldname,
                                 geofieldvalue,
                                 geoterm['distance'],
                                 'arc', 'km')
         self._filters.append(_gf)
         self._query_field.append(geofieldname)
     if "optype" in sql.keys():
         # Condition filters
         if sql['optype'].lower() == 'and':
             self._filtertype = ANDFilter(self._filters)
         if sql['optype'].lower() == 'or':
             self._filtertype = ORFilter(self._filters)
     else:
         self._filtertype = ANDFilter(self._filters)
     return FilteredQuery(MatchAllQuery(), self._filtertype)
コード例 #32
0
ファイル: query.py プロジェクト: zldww2011/binarypig
def get_query(s):
    import re
    queries = []

    i = 0
    freetext = ""
    for mat in re.finditer(r'(?P<name>\S+):(?P<value>"[^"]+"|\S+)\s*', s):
        freetext += s[i:mat.start()]
        i = mat.end()
        q = mat.groupdict()
        value = q['value'].strip('"')
        if value.endswith("*"):
            queries.append(WildcardQuery(field=q['name'], value=value))
        else:
            queries.append(TermQuery(field=q['name'], value=value))
    freetext += s[i:]
    freetext = freetext.strip()
    if freetext:
        if freetext == '*':
            queries.append(MatchAllQuery())
        else:
            queries.append(TextQuery("_all", freetext, operator='and'))

    if len(queries) == 1:
        return queries[0]
    else:
        q = BoolQuery()
        for query in queries:
            q.add_must(query)
        return q
コード例 #33
0
ファイル: test_resultset.py プロジェクト: glitchdotcom/pyes
    def test_iterator_offset(self):
        # Query for a block of 10, starting at position 10:
        #
        resultset = self.conn.search(Search(
            MatchAllQuery(),
            start=10,
            size=10,
            sort={'position': {
                'order': 'asc'
            }}),
                                     self.index_name,
                                     self.document_type,
                                     start=10,
                                     size=10)

        # Ensure that there are 1000 results:
        #
        self.assertEqual(len(resultset), 1000)

        # Now check that we actually have records 10-19, rather than 0-9:
        #
        position = 0
        for r in resultset:
            self.assertEqual(r.position, position + 10)
            position += 1
コード例 #34
0
 def test_sorting_by_geolocation(self):
     search = Search(MatchAllQuery())
     search.sort.add(GeoSortOrder(field='location', lat=1, lon=1))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['1', '2', '3'])
コード例 #35
0
 def test_sorting_by_script(self):
     search = Search(MatchAllQuery())
     search.sort.add(ScriptSortOrder("1.0/doc['foo'].value", type='number'))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['3', '2', '1'])
コード例 #36
0
 def test_sorting_by_foo(self):
     search = Search(MatchAllQuery())
     search.sort.add(SortOrder('foo', order='desc'))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['3', '2', '1'])
コード例 #37
0
 def records(self):
     if not self.connection:
         raise RuntimeError("Stream is not initialized")
     from pyes.query import MatchAllQuery
     results = self.connection.search(MatchAllQuery(),
                                      search_type="scan",
                                      timeout="5m",
                                      size="200")
     return ESRecordIterator(results, self.expand)
コード例 #38
0
ファイル: dumprestore.py プロジェクト: danfairs/esman
def dump_docs(fp, conn, index_name, doc_type, scroll='5m', encoding='utf8'):
    q = MatchAllQuery()
    for result in conn.search(q,
                              indices=[index_name],
                              doc_types=[doc_type],
                              scan=True,
                              scroll=scroll):
        fp.write(json.dumps(result, encoding=encoding))
        fp.write('\n')
コード例 #39
0
 def rows(self):
     if not self.connection:
         raise RuntimeError("Stream is not initialized")
     from pyes.query import MatchAllQuery
     fields = self.field_names
     results = self.connection.search(MatchAllQuery(),
                                      search_type="scan",
                                      timeout="5m",
                                      size="200")
     return ESRowIterator(results, fields)
コード例 #40
0
ファイル: test_nested.py プロジェクト: trb116/pythonanalyzer
    def test_nested_filter(self):
        q = FilteredQuery(MatchAllQuery(), TermFilter('_all', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 2)

        q = FilteredQuery(MatchAllQuery(),
                          TermFilter('nested1.n_field1', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 0)

        q = FilteredQuery(MatchAllQuery(),
                          TermFilter('nested1.n_field1', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 0)

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'nested1',
                BoolQuery(must=[TermQuery('nested1.n_field1', 'n_value1_1')])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 2)

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'nested1',
                BoolQuery(must=[
                    TermQuery('nested1.n_field1', 'n_value1_1'),
                    TermQuery('nested1.n_field2', 'n_value2_1')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 1)
コード例 #41
0
ファイル: query.py プロジェクト: zldww2011/binarypig
def add_filters(filters):
    # filters
    flist = []
    for field, term in filters:
        flist.append(TermFilter(field, term))

    if flist:
        f = ANDFilter(flist)
        q = FilteredQuery(q, f)
    else:
        q = MatchAllQuery()

    return q
コード例 #42
0
ファイル: facets.py プロジェクト: crankycoder/zamboni-lib
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(
         DateHistogramFacet('date_facet', field='date', interval='month'))
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'],
                       [{
                           u'count': 2,
                           u'time': 1301616000000
                       }, {
                           u'count': 1,
                           u'time': 1304208000000
                       }])
     self.assertEquals(
         datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(
         datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
コード例 #43
0
def dump_topics(backupdir,
                eshost,
                _type,
                indices="topics-all"):
    conn = ES(eshost)
    out = file('/tmp/out.json','w')
    q = MatchAllQuery()
    q = q.search()

    resultset = conn.search(query=q,indices=indices, doc_types=[_type], scan=True)
    cnt=0
    if not resultset.total:
        sys.stderr.write("no data\n")
        return

    try:
        sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name))
        while True:
            r = resultset.next()
            r['_id'] = r._meta.id
            cnt+=1
            out.write(json.dumps(r)+'\n')
    except StopIteration:
        pass

    out.close()

    # gzip
    backup = os.path.join(backupdir,"topics.{}.json.gz".format(_type))

    f_in = open(out.name,'rb')
    f_out = gzip.open(backup,'wb')
    f_out.writelines(f_in)
    f_out.close()
    f_out.close()
    sys.stderr.write("Created %s\n"%backup)
コード例 #44
0
ファイル: test_facets.py プロジェクト: Naeka/pyes
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet("tag")
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}, {u"count": 1, u"term": u"bar"}])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet("tag")
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet("tag")
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet("bag")
        self.assertNotEquals(q2, q4)
コード例 #45
0
ファイル: test_facets.py プロジェクト: akkumar/pyes
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
        self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)
コード例 #46
0
ファイル: test_facets.py プロジェクト: akkumar/pyes
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(TermFacet(field='topic', facet_filter=BoolFilter(must_not=TermQuery(field='reviewed', value=True))))
     serialized = query.serialize()
     self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
コード例 #47
0
ファイル: test_facets.py プロジェクト: Naeka/pyes
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter("tag", "foo"))
        q = q.search()
        q.facet.add_term_facet("tag")
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets["tag"]["terms"], [{u"count": 2, u"term": u"foo"}])
        self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter("tag", "foo"))
        q2 = q2.search()
        q2.facet.add_term_facet("tag")
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter("tag", "foo"))
        q3 = q3.search()
        q3.facet.add_term_facet("tag")
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter("tag", "foo"))
        q4 = q4.search()
        q4.facet.add_term_facet("bag")
        self.assertNotEquals(q3, q4)
コード例 #48
0
import json
from pyes import ES, Search
from pyes.aggs import TermsAgg, SumAgg, FilterAgg, DateHistogramAgg
from pyes.exceptions import IndexMissingException
from pyes.query import MatchAllQuery, BoolQuery, RangeQuery, ESRange, TermQuery
from pyes.filters import TermFilter, TermsFilter

match_all = MatchAllQuery()
sub_domain_agg = TermsAgg('domain_agg',
                          field='json_data.etp_domain_id',
                          size=20000)
client_agg = TermsAgg('client_agg',
                      field='json_data.etp_client_id',
                      sub_aggs=[sub_domain_agg],
                      size=20000)

search_query = Search(query=match_all, size=0)
search_query.agg.add(client_agg)

print(json.dumps(search_query.serialize(), indent=2))
コード例 #49
0
ファイル: test_facets.py プロジェクト: stevencdavis/pyes
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }, {
            u'count': 1,
            u'term': u'bar'
        }])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q2, q4)
コード例 #50
0
ファイル: test_facets.py プロジェクト: stevencdavis/pyes
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{
            u'count': 2,
            u'term': u'foo'
        }])
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)