def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{ u'count': 2, u'term': u'foo' }]) self.assertEquals(resultset.facets.tag.terms, [{ u'count': 2, u'term': u'foo' }]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter('tag', 'foo')) q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter('tag', 'foo')) q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter('tag', 'foo')) q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q3, q4)
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter("tag", "foo")) q = q.search() q.facet.add_term_facet("tag") resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets["tag"]["terms"], [{u"count": 2, u"term": u"foo"}]) self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter("tag", "foo")) q2 = q2.search() q2.facet.add_term_facet("tag") q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter("tag", "foo")) q3 = q3.search() q3.facet.add_term_facet("tag") self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter("tag", "foo")) q4 = q4.search() q4.facet.add_term_facet("bag") self.assertNotEquals(q3, q4)
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{ u'count': 2, u'term': u'foo' }, { u'count': 1, u'term': u'bar' }]) q2 = MatchAllQuery() q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q2, q4)
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}]) self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}]) q2 = MatchAllQuery() q2 = FilteredQuery(q2, TermFilter('tag', 'foo')) q2 = q2.search() q2.facet.add_term_facet('tag') q3 = MatchAllQuery() q3 = FilteredQuery(q3, TermFilter('tag', 'foo')) q3 = q3.search() q3.facet.add_term_facet('tag') self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = FilteredQuery(q4, TermFilter('tag', 'foo')) q4 = q4.search() q4.facet.add_term_facet('bag') self.assertNotEquals(q3, q4)
def dump(start,end,backupdir,eshost): conn = ES(eshost) out = file('/tmp/out.json','w') _type = 'habakkuk' q = MatchAllQuery() q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False))) q = q.search() # print json.dumps(json.loads(q.to_search_json()),indent=2) resultset = conn.search(query=q,indices=_type+"-*", doc_types=[_type], scan=True) cnt=0 if not resultset.total: sys.stderr.write("no data for %s - %s\n"%(start,end)) return try: sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name)) while True: r = resultset.next() cnt+=1 out.write(json.dumps(r)+'\n') except StopIteration: pass out.close() # gzip ext = datetime.strftime(start,'%Y-%m-%d') backup = os.path.join(backupdir,"habakkuk-%s.json.gz"%ext) f_in = open(out.name,'rb') f_out = gzip.open(backup,'wb') f_out.writelines(f_in) f_out.close() f_out.close() sys.stderr.write("Created %s\n"%backup)
def term_facet(host='localhost:9200', terms=['bibleverse'], _type='habakkuk', date_filter=[], size=10): ret = [] conn = ES(host) q = MatchAllQuery() if date_filter: start,end = date_filter q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False))) q = q.search(size=0) for term in terms: q.facet.add_term_facet(term,order='count',size=size) print json.dumps(json.loads(q.to_search_json()),indent=2) resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type]) for facet in resultset.facets: print "Total",facet,resultset.facets[facet]['total'] for row in resultset.facets[facet]['terms']: print "\t",row['term'],row['count'] ret.append((facet,row['term'])) return ret
def facets(host='localhost:9200', facet_terms=['bibleverse'], _type='habakkuk', date_filter=[], size=10): ret = {} conn = ES(host) q = MatchAllQuery() if date_filter: start,end = date_filter q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date', start.isoformat(), end.isoformat(), include_upper=False))) q = q.search(size=0) for term in facet_terms: q.facet.add_term_facet(term,order='count',size=size) es_logger.info(q.serialize()) resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type]) for facet in resultset.facets: ret[facet] = [] for row in resultset.facets[facet]['terms']: ret[facet].append({"value":row['term'],"count":row['count']}) logger.debug("facets return|'%s'"%json.dumps(ret)) return ret
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['tag']['terms'], [{u'count': 2, u'term': u'foo'}, {u'count': 1, u'term': u'bar'}])
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=["test-index"], doc_types=["test-type"]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
def test_nested_agg(self): q = MatchAllQuery() q = q.search() nested = NestedAgg(name="nested", path="resellers") q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested, {u"doc_count": 4})
def test_nested_agg(self): q = MatchAllQuery() q = q.search() nested = NestedAgg(name='nested', path='resellers') q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('tag') resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}, {u'count': 1, u'term': u'bar'}])
def test_max_agg(self): q = MatchAllQuery() q = q.search() max_agg = MaxAgg(name="max", field="position") q.agg.add(max_agg) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.max, {u"value": 3})
def test_missing_agg(self): q = MatchAllQuery() q = q.search() missing = MissingAgg(name='missing', field='integer') q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
def test_max_agg(self): q = MatchAllQuery() q = q.search() max_agg = MaxAgg(name='max', field='position') q.agg.add(max_agg) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.max, {u'value': 3})
def test_missing_agg(self): q = MatchAllQuery() q = q.search() missing = MissingAgg(name="missing", field="integer") q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.missing, {u"doc_count": 1})
def test_min_agg(self): q = MatchAllQuery() q = q.search() missing = MinAgg(name="min", field="position") q.agg.add(missing) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.min, {u'value': 1})
def get_top_authors(): q = MatchAllQuery() q = q.search() q.facet.add_term_facet('author') es = get_connection() facets = es.search(q, ELASTICSEARCH_INDEX, 'post').facets authors = [] for term in facets['author']['terms']: authors.append(get_author(id=term['term'])) return authors
def test_terms_facet(self): q = MatchAllQuery() q = q.search() q.facet.add_term_facet("tag") resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}, {u"count": 1, u"term": u"bar"}]) q2 = MatchAllQuery() q2 = q2.search() q2.facet.add_term_facet("tag") q3 = MatchAllQuery() q3 = q3.search() q3.facet.add_term_facet("tag") self.assertEquals(q2, q3) q4 = MatchAllQuery() q4 = q4.search() q4.facet.add_term_facet("bag") self.assertNotEquals(q2, q4)
def test_reverse_nested_agg(self): q = MatchAllQuery() q = q.search() reverse_nested = ReverseNestedAgg(name='reverse', field='id') nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested]) q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested['doc_count'], 4) self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
def test_reverse_nested_agg(self): q = MatchAllQuery() q = q.search() reverse_nested = ReverseNestedAgg(name="reverse", field="id") nested = NestedAgg(name="nested", path="resellers", sub_aggs=[reverse_nested]) q.agg.add(nested) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(resultset.total, 3) self.assertEqual(resultset.aggs.nested["doc_count"], 4) self.assertEqual(resultset.aggs.nested.reverse, {u"doc_count": 2})
def test_date_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, RangeFilter(qrange=ESRange('date', datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False))) q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
def test_terms_facet_filter(self): q = MatchAllQuery() q = FilteredQuery(q, TermFilter('tag', 'foo')) q = q.search() q.facet.add_term_facet('tag') result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 2) self.assertEquals(result['facets']['tag']['terms'], [{ u'count': 2, u'term': u'foo' }])
def test_date_facet_filter(self): q = MatchAllQuery() q = FilteredQuery( q, RangeFilter( qrange=ESRange("date", datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False) ), ) q = q.search() q.facet.facets.append(DateHistogramFacet("date_facet", field="date", interval="month")) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 2) self.assertEquals(resultset.facets["date_facet"]["entries"], [{u"count": 2, u"time": 1301616000000}])
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.fromtimestamp(1301616000000/1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.fromtimestamp(1304208000000/1000.).date(), datetime.date(2011, 05, 01))
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.utcfromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.utcfromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append(DateHistogramFacet('date_facet', field='date', interval='month')) resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEquals(resultset.total, 3) self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000}, {u'count': 1, u'time': 1304208000000}]) self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def test_date_facet(self): q = MatchAllQuery() q = q.search() q.facet.facets.append( DateHistogramFacet('date_facet', field='date', interval='month')) result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"]) self.assertEquals(result['hits']['total'], 3) self.assertEquals(result['facets']['date_facet']['entries'], [{ u'count': 2, u'time': 1301616000000 }, { u'count': 1, u'time': 1304208000000 }]) self.assertEquals( datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(), datetime.date(2011, 04, 01)) self.assertEquals( datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(), datetime.date(2011, 05, 01))
def dump_topics(backupdir, eshost, _type, indices="topics-all"): conn = ES(eshost) out = file('/tmp/out.json','w') q = MatchAllQuery() q = q.search() resultset = conn.search(query=q,indices=indices, doc_types=[_type], scan=True) cnt=0 if not resultset.total: sys.stderr.write("no data\n") return try: sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name)) while True: r = resultset.next() r['_id'] = r._meta.id cnt+=1 out.write(json.dumps(r)+'\n') except StopIteration: pass out.close() # gzip backup = os.path.join(backupdir,"topics.{}.json.gz".format(_type)) f_in = open(out.name,'rb') f_out = gzip.open(backup,'wb') f_out.writelines(f_in) f_out.close() f_out.close() sys.stderr.write("Created %s\n"%backup)