def test_GeoDistanceFilter(self): gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km") q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoDistanceFilter("pin.location", [70, 40], "200km") q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1)
def _get_results(self): """ @returns: elasticsearch iterator over results defined by self.query """ query = self.db_query if self.db_query.is_empty(): query = MatchAllQuery() if self._ordering: query.sort = self._ordering #print "query", self.query.tables, query return self._connection.search(query, indices=[self.connection.db_name], doc_types=self.query.model._meta.db_table)
def _get_ids_worker(args): from utils.es import ESIndexer from pyes import MatchAllQuery es_kwargs, start, step = args q = MatchAllQuery().search() q.sort = [{'entrezgene': 'asc'}, {'ensembl.gene': 'asc'}] q.fields = [] q.start = start q.size = step esi = ESIndexer(**es_kwargs) cnt = esi.count()['count'] res = esi.conn.search_raw(q) assert res['hits']['total'] == cnt return [doc['_id'] for doc in res['hits']['hits']]
def test_remove(self): """Ensure we can properly delete from ElasticSearch via DocManager. """ docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'} self.elastic_doc.upsert(docc) self.elastic_doc.commit() res = self.elastic_conn.search(MatchAllQuery()) self.assertTrue(len(res) == 1) self.elastic_doc.remove(docc) self.elastic_doc.commit() res = self.elastic_conn.search(MatchAllQuery()) self.assertTrue(len(res) == 0)
def _get_results(self): """ @returns: elasticsearch iterator over results defined by self.query """ query = self.db_query if self.db_query.is_empty(): query = MatchAllQuery() if self._ordering: query.sort = self._ordering #print "query", self.query.tables, query return self._connection.search( query, indices=[self.connection.db_name], doc_types=self.query.model._meta.db_table)
def test_remove(self): """Ensure we can properly delete from ElasticSearch via DocManager. """ docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'} ElasticDoc.upsert(docc) ElasticDoc.commit() res = elastic.search(MatchAllQuery()) self.assertTrue(len(res) == 1) ElasticDoc.remove(docc) ElasticDoc.commit() res = elastic.search(MatchAllQuery()) self.assertTrue(len(res) == 0) print("PASSED REMOVE")
def get_last_doc(self): """Returns the last document stored in the Elastic engine. """ result = self.elastic.search(MatchAllQuery(), size=1, sort='_ts:desc') for item in result: return item
def __call__(self, dquery): filters = [] catalog = self.catalogtool._catalog idxs = catalog.indexes.keys() query = MatchAllQuery() for key, value in dquery.items(): if key not in idxs: continue index = getIndex(catalog, key) if index is None: continue qq = index.get_query(key, value) if qq is None: continue if type(qq) == tuple: qq, is_query = qq else: is_query = False if is_query: query = qq else: filters.append(qq) if len(filters) == 0: return query else: return FilteredQuery(query, ANDFilter(filters))
def count(self, limit=None): query = self.db_query if self.db_query.is_empty(): query = MatchAllQuery() res = self._connection.count(query, doc_types=self.query.model._meta.db_table) return res["count"]
def test_upsert(self): """Ensure we can properly insert into ElasticSearch via DocManager. """ docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'} ElasticDoc.upsert(docc) ElasticDoc.commit() res = elastic.search(MatchAllQuery()) for doc in res: self.assertTrue(doc['_id'] == '1' and doc['name'] == 'John') docc = {'_id': '1', 'name': 'Paul', 'ns': 'test.test'} ElasticDoc.upsert(docc) ElasticDoc.commit() res = elastic.search(MatchAllQuery()) for doc in res: self.assertTrue(doc['_id'] == '1' and doc['name'] == 'Paul') print("PASSED UPSERT")
def test_upsert(self): """Ensure we can properly insert into ElasticSearch via DocManager. """ docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'} self.elastic_doc.upsert(docc) self.elastic_doc.commit() res = self.elastic_conn.search(MatchAllQuery()) for doc in res: self.assertTrue(doc['_id'] == '1' and doc['name'] == 'John')
def test_GeoBoundingBoxFilter(self): gq = GeoBoundingBoxFilter("pin.location", location_tl={ "lat": 40.717, "lon": 70.99 }, location_br={ "lat": 40.03, "lon": 72.0 }) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717], [74.1, 40.03]) q = FilteredQuery(MatchAllQuery(), gq) result2 = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(result2.total, 1)
def test_ReconvertDoubles(self): """Regression test for issue#6. Pyes used to fail when getting a query respones in which a document contained a list of doubles. """ q = MatchAllQuery() result = self.conn.search(query=q, indexes=["test-pindex"]) self.assertEquals(result['hits']['total'], 2)
def test_GeoPolygonFilter(self): gq = GeoPolygonFilter("pin.location", [{ "lat": 50, "lon": -30 }, { "lat": 30, "lon": -80 }, { "lat": 80, "lon": -90 }]) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1) gq = GeoPolygonFilter("pin.location", [[-30, 50], [-80, 30], [-90, 80]]) q = FilteredQuery(MatchAllQuery(), gq) resultset = self.conn.search(query=q, indices=["test-mindex"]) self.assertEquals(resultset.total, 1)
def get_last_doc(self): """Returns the last document stored in the Elastic engine. """ it = None q = MatchAllQuery() result = self.elastic.search(q, size=1, sort={'_ts:desc'}) for it in result: r = it break return r
def test_full_search(self): """Query ElasticSearch for all docs via API and via DocManager's _search(), compare. """ docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'} self.elastic_doc.upsert(docc) docc = {'_id': '2', 'name': 'Paul', 'ns': 'test.test'} self.elastic_doc.upsert(docc) self.elastic_doc.commit() search = self.elastic_doc._search() search2 = self.elastic_conn.search(MatchAllQuery()) self.assertTrue(len(search) == len(search2)) self.assertTrue(len(search) != 0) for i in range(0, len(search)): self.assertTrue(list(search)[i] == list(search2)[i])
def afterCompletion(self, transaction): tdata = get() if not tdata.registered: return es = tdata.es if es.mode == DISABLE_MODE: tdata.reset() return success = transaction.status == Status.COMMITTED query = FilteredQuery(MatchAllQuery(), TermFilter('transaction_id', tdata.tid)) conn = es.conn # NEED to refresh here otherwise we'll have inconsistencies conn.refresh() try: docs = conn.search(query, es.catalogsid, es.trns_catalogtype, sort='order:desc') docs.count() # force executing except ElasticSearchException: # XXX uh oh, nasty, we have a problem. Let's log it. warn("Error trying to abort transaction: %s" %( traceback.format_exc())) tdata.reset() return for doc in docs: conn.delete(es.catalogsid, es.trns_catalogtype, doc.get_id()) if not success: if doc.action == Actions.add: # if it was an add action, remove delete conn.delete(es.catalogsid, es.catalogtype, doc.uid) elif doc.action in (Actions.modify, Actions.delete): # if it was a modify or delete, restore the doc restored_doc = loads(doc.data) conn.index(restored_doc, es.catalogsid, es.catalogtype, doc.uid) # NEED to refresh here otherwise we'll have inconsistencies conn.refresh() tdata.reset()
def _get_results(self, index=0, count=None): """ @returns: elasticsearch iterator over results defined by self.query """ query = self.db_query if self.db_query.is_empty(): query = MatchAllQuery() if self._ordering: query.sort = self._ordering query = Search(query) query.bulk_read = 5000 if index > 0 and count is not None: query.start = index query.size = count # else: # query = Search(query, start=0, size=5000) #print "query", self.query.tables, query return self._connection.search(query, indices=[self.connection.db_name], doc_types=self.query.model._meta.db_table)
def query(self, q=None, fields=None, start=0, size=10, sort=None, only_in=None, h=['_all'], facets=None, returnquery=False, explain=False, filter_by=None, custom_filter=None): '''Perform a query on ES and return SearchResult object. @param q: if q is a string, it will be wrapped as a StringQuery, otherwise, q must be a pre-built Query instance. @param fields: a list of fields to be returned in the query result. to return all field, use ['_source']. @param start: hits to skip, for pagination. @param size: # of hits to be returned, for pagination. @param sort: fields used to sort return hits, e.g., ['species', 'symbol'] ['-_id'] # descending on _id field @param only_in: a list of "index_types" to search against. Any types not in self.ES_AVAILABLE_TYPES will be ignored; or a string of one specific index_type; or if empty (None or []), all available index_type will be searched against. @param h: fields for highlighting @param facets: fields for faceting, using default facets if None. @param returnquery: if True, return query JSON string for debugging. @param explain: if True, enables explanation for each hit on how its score was computed. @param filter_by: A dictionary of {<field>: <value>} or a list of (<field>, <value>) tuple, e.g., {'tag': 'chart', 'species': 'human'} [('tag', 'chart'), ('species', ['human', 'mouse'])] Note that <value> can be a list for multiple values. @param custom_filter: if provided, apply this filter instead. ''' # Parse out the possible types to search across # doc_types = [] # if only_in: # if isinstance(only_in, basestring): # only_in = [only_in] # doc_types = list(set(only_in) & set(self.ES_AVAILABLE_TYPES)) # doc_types = doc_types or self.ES_AVAILABLE_TYPES.keys() if only_in: if isinstance(only_in, basestring): only_in = [only_in] doc_types = only_in else: doc_types = self.ES_AVAILABLE_TYPES.keys() # Initialize q if it was not specified if not q: q = MatchAllQuery() # Setup q as a Query object if it was passed in as a string if type(q) in types.StringTypes: # Check for max query length if len(q) > self.ES_MAX_QUERY_LENGTH: return BiogpsSearchResult({'error': 'Query string too long.'}) q = StringQuery(q, default_operator='AND') # Apply custom_filter if provided if custom_filter: q = FilteredQuery(q, custom_filter) # Otherwise, call the default filter build chain else: filter = self._build_filter(doc_types, filter_by) if filter: q = FilteredQuery(q, filter) q = q.search(fields=fields, start=start, size=size, sort=sort , explain=explain) # , index_boost={'gene': 1}) # Add highlighting for _h in h: q.add_highlight(_h, fragment_size=300, number_of_fragments=0) # Add faceting _facets = facets or self._get_default_facets(doc_types) if _facets: for _f in _facets: q.facet.add_term_facet(_f) # Only for debugging if returnquery: return json.dumps(q.q, indent=2) # Run the final query and return the results return self._query(q, doc_types)
def get_pubs(filter=MissingFilter('types')): q = FilteredQuery(MatchAllQuery(), filter) pubs = conn.search(query=q, indices=e_index, doc_types="immo") return pubs
def _search(self): """For test purposes only. Performs search on Elastic with empty query. Does not have to be implemented. """ results = self.elastic.search(MatchAllQuery()) return results
def getAllElasticsTransactions(self): return self.es.conn.search(MatchAllQuery(), self.es.catalogsid, self.es.trns_catalogtype)
# test from mediaresearchapp.tasks import MediaAggregateSQLTask if __name__ == '__main__': es = ES("127.0.0.1:9200", default_indices='mediaaggregate') # Filters filters = [GeoDistanceFilter('location', [40.0, 9.00], 20, 'arc', 'km')] # filters = [TermFilter('message', 'elastic'), # GeoDistanceFilter('locations', # {"lat": 40.0, "lon": 9.00}, # 20, 'arc', 'km') # ] filter = ANDFilter(filters) q = FilteredQuery(MatchAllQuery(), filter) results = es.search(q) for r in results: print r break q4 = RegexTermQuery('city', 'bang.*') print q4 resultset = es.search(q4) for r in resultset: print r query_str = { "query": { "termquery": [{ "fieldname1": "value"
ftool = FileTools() ftrans = FormatTranslator() # 1. Create Connection conn = ES() # 2. Index Data dataset_json = open("../dataset.json") dataset = json.load(dataset_json)['data'] for data in dataset: conn.index(data, "example_index", "example_type", "example_id_" + str(dataset.index(data))) # 3. Create Simple Query query = MatchAllQuery() # 4. Create Simple Aggregation agg = TermsAgg('agg1', field="name", sub_aggs=[], size=100) # 5. Get Result search = Search(query, size=5) search.agg.add(agg) print search.serialize() result = conn.search(search, "example_index", "example_type") for i in result: print json.dumps(i, indent=2) print json.dumps(result.aggs, indent=2)