Ejemplo n.º 1
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{
            u'count': 2,
            u'term': u'foo'
        }])
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)
Ejemplo n.º 2
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter("tag", "foo"))
        q = q.search()
        q.facet.add_term_facet("tag")
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets["tag"]["terms"], [{u"count": 2, u"term": u"foo"}])
        self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter("tag", "foo"))
        q2 = q2.search()
        q2.facet.add_term_facet("tag")
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter("tag", "foo"))
        q3 = q3.search()
        q3.facet.add_term_facet("tag")
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter("tag", "foo"))
        q4 = q4.search()
        q4.facet.add_term_facet("bag")
        self.assertNotEquals(q3, q4)
Ejemplo n.º 3
0
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }, {
            u'count': 1,
            u'term': u'bar'
        }])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q2, q4)
Ejemplo n.º 4
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
        self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)
Ejemplo n.º 5
0
def dump(start,end,backupdir,eshost):
    conn = ES(eshost)
    out = file('/tmp/out.json','w')
    _type = 'habakkuk'
    q = MatchAllQuery()
    q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))
    q = q.search()
    # print json.dumps(json.loads(q.to_search_json()),indent=2)
    resultset = conn.search(query=q,indices=_type+"-*", doc_types=[_type], scan=True)
    cnt=0
    if not resultset.total:
        sys.stderr.write("no data for %s - %s\n"%(start,end))
        return

    try:
        sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name))
        while True:
            r = resultset.next()
            cnt+=1
            out.write(json.dumps(r)+'\n')
    except StopIteration:
        pass

    out.close()

    # gzip
    ext = datetime.strftime(start,'%Y-%m-%d')
    backup = os.path.join(backupdir,"habakkuk-%s.json.gz"%ext)

    f_in = open(out.name,'rb')
    f_out = gzip.open(backup,'wb')
    f_out.writelines(f_in)
    f_out.close()
    f_out.close()
    sys.stderr.write("Created %s\n"%backup)
Ejemplo n.º 6
0
def term_facet(host='localhost:9200',
               terms=['bibleverse'],
               _type='habakkuk',
               date_filter=[],
               size=10):
    ret = []
    conn = ES(host)
    q = MatchAllQuery()
    if date_filter:
        start,end = date_filter
        q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))

    q = q.search(size=0)
    for term in terms:
        q.facet.add_term_facet(term,order='count',size=size)
        
    print json.dumps(json.loads(q.to_search_json()),indent=2)

    resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
    for facet in resultset.facets:
        print "Total",facet,resultset.facets[facet]['total']
        for row in resultset.facets[facet]['terms']:
            print "\t",row['term'],row['count']
            ret.append((facet,row['term']))
        
    return ret
Ejemplo n.º 7
0
def facets(host='localhost:9200',
          facet_terms=['bibleverse'],
          _type='habakkuk',
          date_filter=[],
          size=10):
    ret = {}
    conn = ES(host)
    q = MatchAllQuery()
    if date_filter:
        start,end = date_filter
        q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',
                                                        start.isoformat(),
                                                        end.isoformat(),
                                                        include_upper=False)))

    q = q.search(size=0)
    for term in facet_terms:
        q.facet.add_term_facet(term,order='count',size=size)
        
    es_logger.info(q.serialize())

    resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
    for facet in resultset.facets:
        ret[facet] = []
        for row in resultset.facets[facet]['terms']:
            ret[facet].append({"value":row['term'],"count":row['count']})

    logger.debug("facets return|'%s'"%json.dumps(ret))
    return ret
Ejemplo n.º 8
0
 def test_terms_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['tag']['terms'], [{u'count': 2, u'term': u'foo'},
                                                          {u'count': 1, u'term': u'bar'}])
Ejemplo n.º 9
0
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     resultset = self.conn.search(query=q, indices=["test-index"], doc_types=["test-type"])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
Ejemplo n.º 10
0
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name="nested", path="resellers")
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u"doc_count": 4})
Ejemplo n.º 11
0
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name='nested', path='resellers')
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
Ejemplo n.º 12
0
 def test_terms_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.add_term_facet('tag')
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'},
                                                          {u'count': 1, u'term': u'bar'}])
Ejemplo n.º 13
0
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name="max", field="position")
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u"value": 3})
Ejemplo n.º 14
0
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name='missing', field='integer')
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
Ejemplo n.º 15
0
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name='max', field='position')
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u'value': 3})
Ejemplo n.º 16
0
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name="missing", field="integer")
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u"doc_count": 1})
Ejemplo n.º 17
0
    def test_min_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MinAgg(name="min", field="position")
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.min, {u'value': 1})
def get_top_authors():
    q = MatchAllQuery()
    q = q.search()
    q.facet.add_term_facet('author')
    es = get_connection()
    facets = es.search(q, ELASTICSEARCH_INDEX, 'post').facets
    authors = []
    for term in facets['author']['terms']:
        authors.append(get_author(id=term['term']))
    return authors
Ejemplo n.º 19
0
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet("tag")
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}, {u"count": 1, u"term": u"bar"}])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet("tag")
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet("tag")
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet("bag")
        self.assertNotEquals(q2, q4)
Ejemplo n.º 20
0
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name='reverse', field='id')
        nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested['doc_count'], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
Ejemplo n.º 21
0
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name="reverse", field="id")
        nested = NestedAgg(name="nested", path="resellers", sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested["doc_count"], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u"doc_count": 2})
Ejemplo n.º 22
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
Ejemplo n.º 23
0
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 2)
     self.assertEquals(result['facets']['tag']['terms'], [{
         u'count': 2,
         u'term': u'foo'
     }])
Ejemplo n.º 24
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(
         q,
         RangeFilter(
             qrange=ESRange("date", datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False)
         ),
     )
     q = q.search()
     q.facet.facets.append(DateHistogramFacet("date_facet", field="date", interval="month"))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets["date_facet"]["entries"], [{u"count": 2, u"time": 1301616000000}])
Ejemplo n.º 25
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
Ejemplo n.º 26
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
                                              field='date',
                                              interval='month'))
     result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000},
                                                                   {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000/1000.).date(),
                       datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000/1000.).date(),
                       datetime.date(2011, 05, 01))
Ejemplo n.º 27
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.utcfromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.utcfromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Ejemplo n.º 28
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Ejemplo n.º 29
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(
         DateHistogramFacet('date_facet', field='date', interval='month'))
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'],
                       [{
                           u'count': 2,
                           u'time': 1301616000000
                       }, {
                           u'count': 1,
                           u'time': 1304208000000
                       }])
     self.assertEquals(
         datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(
         datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Ejemplo n.º 30
0
def dump_topics(backupdir,
                eshost,
                _type,
                indices="topics-all"):
    conn = ES(eshost)
    out = file('/tmp/out.json','w')
    q = MatchAllQuery()
    q = q.search()

    resultset = conn.search(query=q,indices=indices, doc_types=[_type], scan=True)
    cnt=0
    if not resultset.total:
        sys.stderr.write("no data\n")
        return

    try:
        sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name))
        while True:
            r = resultset.next()
            r['_id'] = r._meta.id
            cnt+=1
            out.write(json.dumps(r)+'\n')
    except StopIteration:
        pass

    out.close()

    # gzip
    backup = os.path.join(backupdir,"topics.{}.json.gz".format(_type))

    f_in = open(out.name,'rb')
    f_out = gzip.open(backup,'wb')
    f_out.writelines(f_in)
    f_out.close()
    f_out.close()
    sys.stderr.write("Created %s\n"%backup)