def write_es_geo(self, es_host='http://localhost:9200/', index_name="geos", doc_type='user_geos'): # try to connect with ES and delete the index es = ElasticSearch('http://localhost:9200/') ## uncomment the following code to prompt check # print "Will delete all the doc in the [index:type] from ElasticSearch:" # print index_name, ":", doc_type # confirm = raw_input("Sure?(y/n)") # if confirm!="y": # sys.exit(0) try: create_es() except Exception as e: print "Error", e else: print index_name, ":", doc_type, " deleted!" # initializing the documents documents = [] for record in self.userGeos: doc = { 'uid': int(record[0]), 'location': { 'lat': record[1], 'lon': record[2] } } documents.append(doc) print "Bulk indexing", len(documents), "documents.." es.bulk_index(index_name, doc_type, documents, id_field='uid') es.refresh(index_name) # test usage print "results from ES," query = {"from": 0, "size": 2000, 'query': {"match_all": {}}} res = es.search(query, index=index_name) print len(res['hits']['hits']), "documents found" print "sample result" print res['hits']['hits'][0]
def write_es_geo(self, es_host='http://localhost:9200/', index_name="geos", doc_type='user_geos'): # try to connect with ES and delete the index es = ElasticSearch('http://localhost:9200/') ## uncomment the following code to prompt check # print "Will delete all the doc in the [index:type] from ElasticSearch:" # print index_name, ":", doc_type # confirm = raw_input("Sure?(y/n)") # if confirm!="y": # sys.exit(0) try: create_es() except Exception as e: print "Error", e else: print index_name,":", doc_type," deleted!" # initializing the documents documents = [] for record in self.userGeos: doc = {'uid':int(record[0]), 'location':{'lat':record[1],'lon':record[2]}} documents.append(doc) print "Bulk indexing", len(documents),"documents.." es.bulk_index(index_name, doc_type, documents, id_field='uid') es.refresh(index_name) # test usage print "results from ES," query = { "from" : 0, "size" : 2000, 'query': { "match_all" : { } } } res = es.search(query, index=index_name) print len(res['hits']['hits']), "documents found" print "sample result" print res['hits']['hits'][0]
"order": "asc", "unit": "km" } }] } query1 = { "from": 0, "size": 10, 'query': { "match_all": {} }, "sort": [{ "likes": { "order": "desc" } }, "_score"], } query_count = {"facets": {"count_by_type": {"terms": {"field": "_type"}}}} # res = es.search(query, index='photo_geos',doc_type=['photos']) res = es.search(query_count, index='geos', doc_type=['user_geos']) print res sys.exit(0) uids = [ (r['_id'], r['sort'][0], r['_source']['views'], r['_source']['likes'], r['_source']['location']['lat'], r['_source']['location']['lon']) for r in res['hits']['hits'] ] print len(uids) for i in range(len(uids)): print uids[i]
class Collation: def __init__(self, es_server_url): self.server = ElasticSearch(es_server_url) self.datadict = { "prc-out": { "lookup": Query("prc-out", "source"), "action": { "definition": Aggregator("drop"), "data": Aggregator({"in": Aggregator("add"), "out": Aggregator("add"), "file": Aggregator("cat")}), "ls": Aggregator("check"), "stream": Aggregator("check"), "source": Aggregator("match"), }, }, "prc-in": { "lookup": Query("prc-in", "dest"), "action": { "definition": Aggregator("drop"), "data": Aggregator({"out": Aggregator("add")}), "ls": Aggregator("check"), "index": Aggregator("cat"), "source": Aggregator("check"), "dest": Aggregator("check"), "process": Aggregator("cat"), }, }, "prc-s-state": { "lookup": Query("prc-s-state"), "action": { "macro": Aggregator("histoadd"), "mini": Aggregator("histoadd"), "micro": Aggregator("histoadd"), "tp": Aggregator("add"), "lead": Aggregator("avg"), "nfiles": Aggregator("add"), "ls": Aggregator("check"), "process": Aggregator("cat"), }, }, } def lookup(self, doctype): return self.datadict[doctype]["lookup"] def action(self, doctype): return self.datadict[doctype]["action"] # print datadict[type]['lookup'] def search(self, ind, doctype, ls, stream=None): if stream: result = self.server.search(self.lookup(doctype)(ls, stream), index=ind) else: result = self.server.search(self.lookup(doctype)(ls), index=ind) return result def collate(self, ind, doctype, ls, stream=None): result = self.search(ind, doctype, ls, stream) for element in result["hits"]["hits"]: for k, v in element["_source"].items(): self.action(doctype)[k](v) retval = dict((k, v.value()) for k, v in self.action(doctype).items()) for v in self.action(doctype).values(): v.reset() return retval def refresh(self, ind): self.server.refresh(ind) def stash(self, ind, doctype, doc): result = self.server.index(ind, doctype, doc) return result
es = ElasticSearch("http://localhost:9200/") lat = float(sys.argv[1]) lon = float(sys.argv[2]) r = float(sys.argv[3]) print lat, lon, r query = { "from": 0, "size": 10, "query": {"match_all": {}}, "filter": {"geo_distance": {"distance": str(r) + "km", "location": {"lat": lat, "lon": lon}}}, "sort": [{"_geo_distance": {"location": {"lat": lat, "lon": lon}, "order": "asc", "unit": "km"}}], } query1 = {"from": 0, "size": 10, "query": {"match_all": {}}, "sort": [{"likes": {"order": "desc"}}, "_score"]} query_count = {"facets": {"count_by_type": {"terms": {"field": "_type"}}}} # res = es.search(query, index='photo_geos',doc_type=['photos']) res = es.search(query_count, index="geos", doc_type=["user_geos"]) print res sys.exit(0) uids = [ ( r["_id"], r["sort"][0], r["_source"]["views"], r["_source"]["likes"], r["_source"]["location"]["lat"], r["_source"]["location"]["lon"], ) for r in res["hits"]["hits"] ] print len(uids)
class Collation: def __init__(self,es_server_url): self.server = ElasticSearch(es_server_url) self.datadict = { 'prc-out' : { "lookup" : Query('prc-out','source'), "action" : { 'definition' : Aggregator('drop'), 'data': Aggregator({'in': Aggregator('add'), 'out': Aggregator('add'), 'file':Aggregator('cat') }), 'ls' : Aggregator('check'), 'stream' : Aggregator('check'), 'source' : Aggregator('match') } }, 'prc-in' : { "lookup" : Query('prc-in','dest'), "action" : { 'definition' : Aggregator('drop'), 'data': Aggregator({ 'out' : Aggregator('add'), }), 'ls' : Aggregator('check'), 'index' : Aggregator('cat'), 'source' : Aggregator('check'), 'dest' : Aggregator('check'), 'process': Aggregator('cat') } }, 'prc-s-state' : { "lookup" : Query('prc-s-state'), "action" : { 'macro' : Aggregator('histoadd'), 'mini' : Aggregator('histoadd'), 'micro' : Aggregator('histoadd'), 'tp' : Aggregator('add'), 'lead' : Aggregator('avg'), 'nfiles' : Aggregator('add'), 'ls' : Aggregator('check'), 'process': Aggregator('cat') } } } def lookup(self,doctype): return self.datadict[doctype]['lookup'] def action(self,doctype): return self.datadict[doctype]['action'] #print datadict[type]['lookup'] def search(self,ind,doctype,ls,stream=None): if stream: result=self.server.search(self.lookup(doctype)(ls,stream), index=ind) else: result=self.server.search(self.lookup(doctype)(ls), index=ind) return result def collate(self,ind,doctype,ls,stream=None): result = self.search(ind,doctype,ls,stream) for element in result['hits']['hits']: for k,v in element['_source'].items(): self.action(doctype)[k](v) retval = dict((k,v.value()) for k,v in self.action(doctype).items()) for v in self.action(doctype).values(): v.reset() return retval def refresh(self,ind): self.server.refresh(ind) def stash(self,ind,doctype,doc): result=self.server.index(ind,doctype,doc) return result
class Collation: def __init__(self, es_server_url): self.server = ElasticSearch(es_server_url) self.datadict = { 'prc-out': { "lookup": Query('prc-out', 'source'), "action": { 'definition': Aggregator('drop'), 'data': Aggregator({ 'in': Aggregator('add'), 'out': Aggregator('add'), 'file': Aggregator('cat') }), 'ls': Aggregator('check'), 'stream': Aggregator('check'), 'source': Aggregator('match') } }, 'prc-in': { "lookup": Query('prc-in', 'dest'), "action": { 'definition': Aggregator('drop'), 'data': Aggregator({ 'out': Aggregator('add'), }), 'ls': Aggregator('check'), 'index': Aggregator('cat'), 'source': Aggregator('check'), 'dest': Aggregator('check'), 'process': Aggregator('cat') } }, 'prc-s-state': { "lookup": Query('prc-s-state'), "action": { 'macro': Aggregator('histoadd'), 'mini': Aggregator('histoadd'), 'micro': Aggregator('histoadd'), 'tp': Aggregator('add'), 'lead': Aggregator('avg'), 'nfiles': Aggregator('add'), 'ls': Aggregator('check'), 'process': Aggregator('cat') } } } def lookup(self, doctype): return self.datadict[doctype]['lookup'] def action(self, doctype): return self.datadict[doctype]['action'] #print datadict[type]['lookup'] def search(self, ind, doctype, ls, stream=None): if stream: result = self.server.search(self.lookup(doctype)(ls, stream), index=ind) else: result = self.server.search(self.lookup(doctype)(ls), index=ind) return result def collate(self, ind, doctype, ls, stream=None): result = self.search(ind, doctype, ls, stream) for element in result['hits']['hits']: for k, v in element['_source'].items(): self.action(doctype)[k](v) retval = dict((k, v.value()) for k, v in self.action(doctype).items()) for v in self.action(doctype).values(): v.reset() return retval def refresh(self, ind): self.server.refresh(ind) def stash(self, ind, doctype, doc): result = self.server.index(ind, doctype, doc) return result