def test_nested_filter_2(self): f1 = filters.TermFilter("key_1", 1) f2 = filters.TermFilter("key_2", 3) filter_list = filters.ANDFilter([f1, f2]) f = filters.NestedFilter(path="nested_filter", filter=filter_list) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(2, len(result.hits)) f1 = filters.TermFilter("key_1", 1) f2 = filters.TermFilter("key_2", 4) filter_list = filters.ANDFilter([f1, f2]) f = filters.NestedFilter(path="nested_filter", filter=filter_list) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(0, len(result.hits)) f1 = filters.TermFilter("key_1", 2) f2 = filters.TermFilter("key_2", 4) filter_list = filters.ANDFilter([f1, f2]) f = filters.NestedFilter(path="nested_filter", filter=filter_list) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(1, len(result.hits))
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) self.assertEquals(resultset.total, 2) self.assertNotEqual(resultset[0].meta.highlight, None)
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") result = self.conn.search(q, indexes=["test-index"]) from pprint import pprint pprint(result) self.assertEquals(result["hits"]["total"], 2)
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) print resultset[0]._meta.highlight self.assertEquals(resultset.total, 2) self.assertNotEqual(resultset[0]._meta.highlight, None) self.assertEquals(resultset[0]._meta.highlight[u"parsedtext"][0], u'<em>Joe</em> Testere nice guy ')
def test_QueryHighlightWithHighLighter(self): h = HighLighter(['<b>'], ['</b>']) q = Search(StringQuery("joe"), highlight=h) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) print resultset[0].meta.highlight self.assertEquals(resultset.total, 2) self.assertNotEqual(resultset[0].meta.highlight, None) self.assertEquals(resultset[0].meta.highlight[u"parsedtext"][0], u'<b>Joe</b> Testere nice guy ')
def find_term(self, name, value, size=10, index=None): if not self.connection: return query = TermQuery(name, value) return self.connection.search(query=Search(query, size=size), indices=index or self.index)
def search(searchkey=u"电影"): conn = ES('127.0.0.1:9200') # TextQuery会对searchkey进行分词 qtitle = TextQuery("title", searchkey) h = HighLighter(['<b>'], ['</b>'], fragment_size=500) # 多字段搜索(must=>and,should=>or),高亮,结果截取(分页),排序 q = Search(BoolQuery(should=[qtitle]), highlight=h, start=0, size=3, sort={'id': {'order': 'asc'}}) q.add_highlight("title") results = conn.search(q, "zhihu", "answer") list = [] for r in results: if("title" in r._meta.highlight): r['title'] = r._meta.highlight[u"title"][0] list.append(r) return template('results.html', list=list, count=results.total)
def test_type(self): f = filters.TypeFilter(type=self.document_type) q = Search(filter=f) print self.dump(q.serialize()) result = self.conn.search(query=q, indices=self.index_name, doc_types=[]) self.assertEqual(2, len(result.hits)) print result.hits self.assertItemsEqual([u'1', u'2'], [item._id for item in result.hits]) f = filters.TypeFilter(type=self.internal_type) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[]) self.assertEqual(1, len(result.hits)) self.assertItemsEqual([u'4'], [item._id for item in result.hits])
def find(self, filter_terms, size=10, doc_types=None, index=None): if not self.connection: return query = self._create_term_query(must_list=filter_terms) return self.connection.search(query=Search(query, size=size), indices=index or self.index, doc_types=doc_types)
def test_exists(self): f = filters.ExistsFilter(field='exists_filter') q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(1, len(result.hits)) self.assertEqual(u'1', result.hits[0]['_id'])
def test_nested_filter_1(self): filter_list = filters.TermFilter("key_1", 1) f = filters.NestedFilter(path="nested_filter", filter=filter_list) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(2, len(result.hits)) filter_list = filters.TermFilter("key_1", 200) f = filters.NestedFilter(path="nested_filter", filter=filter_list) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(1, len(result.hits)) self.assertEqual(u'2', result.hits[0]['_id'])
def test_ids_filter_with_type(self): """Doc type set in filter and left blank in search """ f = filters.IdsFilter(values=[1, 2, 4], type=self.internal_type) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[]) self.assertEqual(1, len(result.hits)) self.assertItemsEqual([u'4'], [item._id for item in result.hits])
def test_ids_direct_type(self): """Doc type set in search """ f = filters.IdsFilter(values=[1, 2, 4]) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(2, len(result.hits)) self.assertItemsEqual([u'1', u'2'], [item._id for item in result.hits])
def search(searchkey=u"电影"): conn = ES('127.0.0.1:9200') # TextQuery会对searchkey进行分词 qtitle = TextQuery("title", searchkey) h = HighLighter(['<b>'], ['</b>'], fragment_size=500) # 多字段搜索(must=>and,should=>or),高亮,结果截取(分页),排序 q = Search(BoolQuery(should=[qtitle]), highlight=h, start=0, size=3, sort={'id': { 'order': 'asc' }}) q.add_highlight("title") results = conn.search(q, "zhihu", "answer") list = [] for r in results: if ("title" in r._meta.highlight): r['title'] = r._meta.highlight[u"title"][0] list.append(r) return template('results.html', list=list, count=results.total)
def test_filter(self): tl = utils.TermsLookup(index=self.index_name, type=self.document_type, id=1, path='terms_lookup_filter') f = filters.TermsFilter('terms_filter', tl) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(2, len(result.hits)) tl = utils.TermsLookup(index=self.index_name, type=self.document_type, id=2, path='terms_lookup_filter') f = filters.TermsFilter('terms_filter', tl) q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(1, len(result.hits))
def test_terms_filter_2(self): """With execution set""" f = filters.TermsFilter(values=[24], field='terms_filter') q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(1, len(result.hits)) f = filters.TermsFilter(values=[24, 25], field='terms_filter') q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(2, len(result.hits)) f = filters.TermsFilter(values=[26], field='terms_filter') q = Search(filter=f) result = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type]) self.assertEqual(0, len(result.hits))
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") result = self.conn.search(q, indexes=["test-index"]) from pprint import pprint pprint(result) self.assertEquals(result['hits']['total'], 2)
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) print resultset[0].meta.highlight self.assertEquals(resultset.total, 2) self.assertNotEqual(resultset[0].meta.highlight, None) self.assertEquals(resultset[0].meta.highlight[u"parsedtext"][0], u'<em>Joe</em> Testere nice guy ')
ftool = FileTools() ftrans = FormatTranslator() # 1. Create Connection conn = ES() # 2. Index Data dataset_json = open("../dataset.json") dataset = json.load(dataset_json)['data'] for data in dataset: conn.index(data, "example_index", "example_type", "example_id_"+str(dataset.index(data))) # 3. Create Simple Query query = MatchAllQuery() # 4. Create Simple Aggregation agg = TermsAgg('agg1', field="name",sub_aggs=[],size=100) # 5. Get Result search = Search(query,size=5) search.agg.add(agg) print search.serialize() result = conn.search(search, "example_index", "example_type" ) for i in result: print json.dumps(i,indent=2) print json.dumps(result.aggs,indent=2) result._do_search() print json.dumps(result._results,indent=2)
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indexes=["test-index"]) self.assertEquals(resultset.total, 2)
def search(self, qrange): f = filters.RangeFilter(qrange) q = Search(filter=f) return self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
import json from pyes import ES, Search from pyes.aggs import TermsAgg, SumAgg, FilterAgg, DateHistogramAgg from pyes.exceptions import IndexMissingException from pyes.query import MatchAllQuery, BoolQuery, RangeQuery, ESRange, TermQuery from pyes.filters import TermFilter, TermsFilter match_all = MatchAllQuery() sub_domain_agg = TermsAgg('domain_agg', field='json_data.etp_domain_id', size=20000) client_agg = TermsAgg('client_agg', field='json_data.etp_client_id', sub_aggs=[sub_domain_agg], size=20000) search_query = Search(query=match_all, size=0) search_query.agg.add(client_agg) print(json.dumps(search_query.serialize(), indent=2))
ftrans = FormatTranslator() # 1. Create Connection conn = ES() # 2. Index Data dataset_json = open("../dataset.json") dataset = json.load(dataset_json)['data'] for data in dataset: conn.index(data, "example_index", "example_type", "example_id_" + str(dataset.index(data))) # 3. Create Simple Query query = MatchAllQuery() # 4. Create Simple Aggregation agg = TermsAgg('agg1', field="name", sub_aggs=[], size=100) # 5. Get Result search = Search(query, size=5) search.agg.add(agg) print search.serialize() result = conn.search(search, "example_index", "example_type") for i in result: print json.dumps(i, indent=2) print json.dumps(result.aggs, indent=2) result._do_search() print json.dumps(result._results, indent=2)