def get_data_from_es(endpoint, index, service, num=20, time=2, query=DEFALULT_QUERY): """Get data from elasticsearch using index name.""" es = Elasticsearch(endpoint, timeout=30) query["size"] = num query["filter"]["range"]["@timestamp"]["gte"] = "now-" + str(time) + "s" query["query"]["match"]["service"] = service return es.search(index, body=json.dumps(query), request_timeout=500)
class ESSearch(object): es_client: Elasticsearch es_index: str def __init__(self, connect=ES_CONNECT, index=ES_INDEX): self.es_client = Elasticsearch(connect) self.es_index = index def search_by_name(self, search: str): res = self.es_client.search( index=self.es_index, doc_type='company', body=dict(query=dict(match_phrase=dict(company_name=search)))) return [company['_source'] for company in res['hits']['hits']] def search_by_location(self, search: str): res = self.es_client.search( index=self.es_index, doc_type='company', body=dict(query=dict(match_phrase=dict(location=search)))) return [company['_source'] for company in res['hits']['hits']] def search_by_id(self, company_id: str): res = self.es_client.search( index=self.es_index, doc_type='company', body=dict(query=dict(match=dict(company_id=company_id)))) if len(res['hits']['hits']): return res['hits']['hits'][0]['_source'] else: return {} def search_by_text(self, text: str): res = self.es_client.search( index=self.es_index, doc_type='company', body=dict(query=dict(query_string=dict(query=text)))) return [company['_source'] for company in res['hits']['hits']]
for index_params in indexes: try: index = get_index(index_params[0], index_params[2], index_params[1]) logger.info(index) # Initialize the request query_body = '{"query": {"match_all": {}}}' scroll = '1m' timeout = 6000 size = 100 page = es.search(index=index, scroll=scroll, size=size, body=query_body, request_timeout=timeout) # Init scroll sid = page['_scroll_id'] scroll_size = page['hits']['total'] # Init data data = [] data.append(page) # Start scrolling while (scroll_size > 0): page = es.scroll(scroll_id=sid, scroll=scroll)
class ESStorage(Storage): """Elasticsearch storage backend.""" NAME = "es" _MESSAGE_FIELD_NAME = "_source.message" def __init__(self, configuration): """Initialize Elasticsearch storage backend.""" super(ESStorage, self).__init__(configuration) self.config.storage = ESConfiguration() self._connect() def _connect(self): self.es = Elasticsearch(self.config.storage.ES_ENDPOINT, timeout=60, max_retries=2) def _prep_index_name(self, prefix): # appends the correct date to the index prefix now = datetime.datetime.now() date = now.strftime("%Y.%m.%d") index = prefix + date return index def retrieve(self, time_range: int, number_of_entires: int): """Retrieve data from ES.""" index_in = self._prep_index_name(self.config.storage.ES_INPUT_INDEX) query = { 'query': { 'match': { 'service': 'journal' } }, "filter": { "range": { "@timestamp": { "gte": "now-2s", "lte": "now" } } }, 'sort': { '@timestamp': { 'order': 'desc' } }, "size": 20 } _LOGGER.info( "Reading in max %d log entries in last %d seconds from %s", number_of_entires, time_range, self.config.storage.ES_ENDPOINT) query['size'] = number_of_entires query['filter']['range']['@timestamp']['gte'] = 'now-%ds' % time_range query['query']['match']['service'] = self.config.storage.ES_SERVICE es_data = self.es.search(index_in, body=json.dumps(query)) # only use _source sub-dict es_data = [x['_source'] for x in es_data['hits']['hits']] es_data_normalized = json_normalize(es_data) _LOGGER.info("%d logs loaded in from last %d seconds", len(es_data_normalized), time_range) self._preprocess(es_data_normalized) return es_data_normalized, es_data # bad solution, this is how Entry objects could come in. def store_results(self, data): """Store results back to ES.""" index_out = self._prep_index_name(self.config.storage.ES_TARGET_INDEX) actions = [{ "_index": index_out, "_type": "log", "_source": data[i] } for i in range(len(data))] helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)
#coding:utf-8 from elasticsearch2 import Elasticsearch from datetime import datetime es = Elasticsearch(hosts="10.10.6.6") es.index(index="keti10_10", doc_type="keti10_10", id=3, body={"bdcdyh": "123", "lx": '1',\ 'postDate':'2017-12-30 12:11:06','qx':'北京','records':2,'uuid':'00123dfad','zl':'北京海淀区'}) #doc=es.get(index="keti10_10", doc_type="keti10_10", id=1)['_source'] #print "doc is %s" % doc res = es.search(index="keti10_10", body={"query": { "match_phrase": { "zl": '北京' } }}) for hit in res['hits']['hits']: hitmap = hit['_source'] print "%(zl)s %(postDate)s" % hitmap