def write_es_geo(self, es_host='http://localhost:9200/', index_name="geos", doc_type='user_geos'): # try to connect with ES and delete the index es = ElasticSearch('http://localhost:9200/') ## uncomment the following code to prompt check # print "Will delete all the doc in the [index:type] from ElasticSearch:" # print index_name, ":", doc_type # confirm = raw_input("Sure?(y/n)") # if confirm!="y": # sys.exit(0) try: create_es() except Exception as e: print "Error", e else: print index_name, ":", doc_type, " deleted!" # initializing the documents documents = [] for record in self.userGeos: doc = { 'uid': int(record[0]), 'location': { 'lat': record[1], 'lon': record[2] } } documents.append(doc) print "Bulk indexing", len(documents), "documents.." es.bulk_index(index_name, doc_type, documents, id_field='uid') es.refresh(index_name) # test usage print "results from ES," query = {"from": 0, "size": 2000, 'query': {"match_all": {}}} res = es.search(query, index=index_name) print len(res['hits']['hits']), "documents found" print "sample result" print res['hits']['hits'][0]
def write_es_geo(self, es_host='http://localhost:9200/', index_name="geos", doc_type='user_geos'): # try to connect with ES and delete the index es = ElasticSearch('http://localhost:9200/') ## uncomment the following code to prompt check # print "Will delete all the doc in the [index:type] from ElasticSearch:" # print index_name, ":", doc_type # confirm = raw_input("Sure?(y/n)") # if confirm!="y": # sys.exit(0) try: create_es() except Exception as e: print "Error", e else: print index_name,":", doc_type," deleted!" # initializing the documents documents = [] for record in self.userGeos: doc = {'uid':int(record[0]), 'location':{'lat':record[1],'lon':record[2]}} documents.append(doc) print "Bulk indexing", len(documents),"documents.." es.bulk_index(index_name, doc_type, documents, id_field='uid') es.refresh(index_name) # test usage print "results from ES," query = { "from" : 0, "size" : 2000, 'query': { "match_all" : { } } } res = es.search(query, index=index_name) print len(res['hits']['hits']), "documents found" print "sample result" print res['hits']['hits'][0]
class Collation: def __init__(self, es_server_url): self.server = ElasticSearch(es_server_url) self.datadict = { "prc-out": { "lookup": Query("prc-out", "source"), "action": { "definition": Aggregator("drop"), "data": Aggregator({"in": Aggregator("add"), "out": Aggregator("add"), "file": Aggregator("cat")}), "ls": Aggregator("check"), "stream": Aggregator("check"), "source": Aggregator("match"), }, }, "prc-in": { "lookup": Query("prc-in", "dest"), "action": { "definition": Aggregator("drop"), "data": Aggregator({"out": Aggregator("add")}), "ls": Aggregator("check"), "index": Aggregator("cat"), "source": Aggregator("check"), "dest": Aggregator("check"), "process": Aggregator("cat"), }, }, "prc-s-state": { "lookup": Query("prc-s-state"), "action": { "macro": Aggregator("histoadd"), "mini": Aggregator("histoadd"), "micro": Aggregator("histoadd"), "tp": Aggregator("add"), "lead": Aggregator("avg"), "nfiles": Aggregator("add"), "ls": Aggregator("check"), "process": Aggregator("cat"), }, }, } def lookup(self, doctype): return self.datadict[doctype]["lookup"] def action(self, doctype): return self.datadict[doctype]["action"] # print datadict[type]['lookup'] def search(self, ind, doctype, ls, stream=None): if stream: result = self.server.search(self.lookup(doctype)(ls, stream), index=ind) else: result = self.server.search(self.lookup(doctype)(ls), index=ind) return result def collate(self, ind, doctype, ls, stream=None): result = self.search(ind, doctype, ls, stream) for element in result["hits"]["hits"]: for k, v in element["_source"].items(): self.action(doctype)[k](v) retval = dict((k, v.value()) for k, v in self.action(doctype).items()) for v in self.action(doctype).values(): v.reset() return retval def refresh(self, ind): self.server.refresh(ind) def stash(self, ind, doctype, doc): result = self.server.index(ind, doctype, doc) return result
class Collation: def __init__(self,es_server_url): self.server = ElasticSearch(es_server_url) self.datadict = { 'prc-out' : { "lookup" : Query('prc-out','source'), "action" : { 'definition' : Aggregator('drop'), 'data': Aggregator({'in': Aggregator('add'), 'out': Aggregator('add'), 'file':Aggregator('cat') }), 'ls' : Aggregator('check'), 'stream' : Aggregator('check'), 'source' : Aggregator('match') } }, 'prc-in' : { "lookup" : Query('prc-in','dest'), "action" : { 'definition' : Aggregator('drop'), 'data': Aggregator({ 'out' : Aggregator('add'), }), 'ls' : Aggregator('check'), 'index' : Aggregator('cat'), 'source' : Aggregator('check'), 'dest' : Aggregator('check'), 'process': Aggregator('cat') } }, 'prc-s-state' : { "lookup" : Query('prc-s-state'), "action" : { 'macro' : Aggregator('histoadd'), 'mini' : Aggregator('histoadd'), 'micro' : Aggregator('histoadd'), 'tp' : Aggregator('add'), 'lead' : Aggregator('avg'), 'nfiles' : Aggregator('add'), 'ls' : Aggregator('check'), 'process': Aggregator('cat') } } } def lookup(self,doctype): return self.datadict[doctype]['lookup'] def action(self,doctype): return self.datadict[doctype]['action'] #print datadict[type]['lookup'] def search(self,ind,doctype,ls,stream=None): if stream: result=self.server.search(self.lookup(doctype)(ls,stream), index=ind) else: result=self.server.search(self.lookup(doctype)(ls), index=ind) return result def collate(self,ind,doctype,ls,stream=None): result = self.search(ind,doctype,ls,stream) for element in result['hits']['hits']: for k,v in element['_source'].items(): self.action(doctype)[k](v) retval = dict((k,v.value()) for k,v in self.action(doctype).items()) for v in self.action(doctype).values(): v.reset() return retval def refresh(self,ind): self.server.refresh(ind) def stash(self,ind,doctype,doc): result=self.server.index(ind,doctype,doc) return result
class Collation: def __init__(self, es_server_url): self.server = ElasticSearch(es_server_url) self.datadict = { 'prc-out': { "lookup": Query('prc-out', 'source'), "action": { 'definition': Aggregator('drop'), 'data': Aggregator({ 'in': Aggregator('add'), 'out': Aggregator('add'), 'file': Aggregator('cat') }), 'ls': Aggregator('check'), 'stream': Aggregator('check'), 'source': Aggregator('match') } }, 'prc-in': { "lookup": Query('prc-in', 'dest'), "action": { 'definition': Aggregator('drop'), 'data': Aggregator({ 'out': Aggregator('add'), }), 'ls': Aggregator('check'), 'index': Aggregator('cat'), 'source': Aggregator('check'), 'dest': Aggregator('check'), 'process': Aggregator('cat') } }, 'prc-s-state': { "lookup": Query('prc-s-state'), "action": { 'macro': Aggregator('histoadd'), 'mini': Aggregator('histoadd'), 'micro': Aggregator('histoadd'), 'tp': Aggregator('add'), 'lead': Aggregator('avg'), 'nfiles': Aggregator('add'), 'ls': Aggregator('check'), 'process': Aggregator('cat') } } } def lookup(self, doctype): return self.datadict[doctype]['lookup'] def action(self, doctype): return self.datadict[doctype]['action'] #print datadict[type]['lookup'] def search(self, ind, doctype, ls, stream=None): if stream: result = self.server.search(self.lookup(doctype)(ls, stream), index=ind) else: result = self.server.search(self.lookup(doctype)(ls), index=ind) return result def collate(self, ind, doctype, ls, stream=None): result = self.search(ind, doctype, ls, stream) for element in result['hits']['hits']: for k, v in element['_source'].items(): self.action(doctype)[k](v) retval = dict((k, v.value()) for k, v in self.action(doctype).items()) for v in self.action(doctype).values(): v.reset() return retval def refresh(self, ind): self.server.refresh(ind) def stash(self, ind, doctype, doc): result = self.server.index(ind, doctype, doc) return result