def get_data_from_es(endpoint, index, service, num=20, time=2, query=DEFALULT_QUERY):
    """Get data from elasticsearch using index name."""
    es = Elasticsearch(endpoint, timeout=30)
    query["size"] = num
    query["filter"]["range"]["@timestamp"]["gte"] = "now-" + str(time) + "s"
    query["query"]["match"]["service"] = service

    return es.search(index, body=json.dumps(query), request_timeout=500)
class ESSearch(object):
    es_client: Elasticsearch
    es_index: str

    def __init__(self, connect=ES_CONNECT, index=ES_INDEX):
        self.es_client = Elasticsearch(connect)
        self.es_index = index

    def search_by_name(self, search: str):
        res = self.es_client.search(
            index=self.es_index,
            doc_type='company',
            body=dict(query=dict(match_phrase=dict(company_name=search))))
        return [company['_source'] for company in res['hits']['hits']]

    def search_by_location(self, search: str):
        res = self.es_client.search(
            index=self.es_index,
            doc_type='company',
            body=dict(query=dict(match_phrase=dict(location=search))))
        return [company['_source'] for company in res['hits']['hits']]

    def search_by_id(self, company_id: str):
        res = self.es_client.search(
            index=self.es_index,
            doc_type='company',
            body=dict(query=dict(match=dict(company_id=company_id))))
        if len(res['hits']['hits']):
            return res['hits']['hits'][0]['_source']
        else:
            return {}

    def search_by_text(self, text: str):
        res = self.es_client.search(
            index=self.es_index,
            doc_type='company',
            body=dict(query=dict(query_string=dict(query=text))))
        return [company['_source'] for company in res['hits']['hits']]
Beispiel #3
0
    for index_params in indexes:

        try:
            index = get_index(index_params[0], index_params[2],
                              index_params[1])
            logger.info(index)

            # Initialize the request
            query_body = '{"query": {"match_all": {}}}'
            scroll = '1m'
            timeout = 6000
            size = 100

            page = es.search(index=index,
                             scroll=scroll,
                             size=size,
                             body=query_body,
                             request_timeout=timeout)

            # Init scroll
            sid = page['_scroll_id']
            scroll_size = page['hits']['total']

            # Init data
            data = []
            data.append(page)

            # Start scrolling
            while (scroll_size > 0):
                page = es.scroll(scroll_id=sid, scroll=scroll)
Beispiel #4
0
class ESStorage(Storage):
    """Elasticsearch storage backend."""

    NAME = "es"
    _MESSAGE_FIELD_NAME = "_source.message"

    def __init__(self, configuration):
        """Initialize Elasticsearch storage backend."""
        super(ESStorage, self).__init__(configuration)
        self.config.storage = ESConfiguration()
        self._connect()

    def _connect(self):
        self.es = Elasticsearch(self.config.storage.ES_ENDPOINT,
                                timeout=60,
                                max_retries=2)

    def _prep_index_name(self, prefix):
        # appends the correct date to the index prefix
        now = datetime.datetime.now()
        date = now.strftime("%Y.%m.%d")
        index = prefix + date
        return index

    def retrieve(self, time_range: int, number_of_entires: int):
        """Retrieve data from ES."""
        index_in = self._prep_index_name(self.config.storage.ES_INPUT_INDEX)

        query = {
            'query': {
                'match': {
                    'service': 'journal'
                }
            },
            "filter": {
                "range": {
                    "@timestamp": {
                        "gte": "now-2s",
                        "lte": "now"
                    }
                }
            },
            'sort': {
                '@timestamp': {
                    'order': 'desc'
                }
            },
            "size": 20
        }

        _LOGGER.info(
            "Reading in max %d log entries in last %d seconds from %s",
            number_of_entires, time_range, self.config.storage.ES_ENDPOINT)

        query['size'] = number_of_entires
        query['filter']['range']['@timestamp']['gte'] = 'now-%ds' % time_range
        query['query']['match']['service'] = self.config.storage.ES_SERVICE

        es_data = self.es.search(index_in, body=json.dumps(query))

        # only use _source sub-dict
        es_data = [x['_source'] for x in es_data['hits']['hits']]
        es_data_normalized = json_normalize(es_data)

        _LOGGER.info("%d logs loaded in from last %d seconds",
                     len(es_data_normalized), time_range)

        self._preprocess(es_data_normalized)

        return es_data_normalized, es_data  # bad solution, this is how Entry objects could come in.

    def store_results(self, data):
        """Store results back to ES."""
        index_out = self._prep_index_name(self.config.storage.ES_TARGET_INDEX)

        actions = [{
            "_index": index_out,
            "_type": "log",
            "_source": data[i]
        } for i in range(len(data))]

        helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)
Beispiel #5
0
#coding:utf-8
from elasticsearch2 import Elasticsearch
from datetime import datetime

es = Elasticsearch(hosts="10.10.6.6")

es.index(index="keti10_10", doc_type="keti10_10", id=3, body={"bdcdyh": "123", "lx": '1',\
 'postDate':'2017-12-30 12:11:06','qx':'北京','records':2,'uuid':'00123dfad','zl':'北京海淀区'})

#doc=es.get(index="keti10_10", doc_type="keti10_10", id=1)['_source']

#print "doc is %s" % doc

res = es.search(index="keti10_10",
                body={"query": {
                    "match_phrase": {
                        "zl": '北京'
                    }
                }})

for hit in res['hits']['hits']:
    hitmap = hit['_source']
    print "%(zl)s %(postDate)s" % hitmap