Esempi in Python per Elasticsearch.index, esempi in Python per elasticsearch5.Elasticsearch.index

Esempio n. 1

0

Mostra file

File: book_details_elasticsearch.py Progetto: ojasskapre/Web-Scraper

def store_book_details(search_book: str, book_dict: dict) -> None:
    elasticsearch = Elasticsearch()

    elasticsearch.index(index='book_details',
                        doc_type='books',
                        id=search_book.lower(),
                        body=json.dumps(book_dict))
    logging.info('Details stored...')

Esempio n. 2

0

Mostra file

def add_test():
    es = Elasticsearch()
    doc = {
        'author': 'kimchy',
        'text': 'Elasticsearch: cool. bonsai cool.',
        'timestamp': int(round(time.time() * 1000)),
        'money': 22
    }
    res = es.index(index="test-index", doc_type='tweet', id=7, body=doc)

    doc = {
        'author': 'tolstoy',
        'text': 'hi',
        'timestamp': int(round(time.time() * 1000)),
        'money': 30
    }
    res = es.index(index="test-index", doc_type='tweet', id=10, body=doc)
    print res

Esempio n. 3

0

Mostra file

def read_log(topic):
    consumer_inner = KafkaC("172.16.10.214", 9092, topic, 'log-test')

    message = consumer_inner.consume_data()

    es = Elasticsearch(hosts='elasticsearch-logging.logging.svc.cluster.local')

    for msg in message:
        offset = msg.offset
        print(offset)
        value = msg.value
        value_dic = json.loads(value)
        date_today = datetime.datetime.now().strftime('%Y-%m-%d')
        timestrap = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f+08:00')
        value_dic['timestrap'] = timestrap
        if 'profile' in value_dic:
            print(value_dic)
            index = "java-log-{env}-{date}".format(env=value_dic['profile'].lower(), date=date_today)
            try:
                es.index(index=index, doc_type='javalog', body=value_dic)
            except Exception as e:
                print(value_dic)

Esempio n. 4

0

Mostra file

File: ElsaticHelper.py Progetto: yangfei812/WatchAD-Web

class ElasticHelper(object):
    def __init__(self):
        self.es = Elasticsearch(ElasticConfig.uri)

    def index(self, body, index, doc_type):
        self.es.index(body=body, index=index, doc_type=doc_type)

    def bulk(self, body, index, doc_type):
        self.es.bulk(body=body, index=index, doc_type=doc_type)

    def scan(self, body, index, doc_type):
        return helpers.scan(self.es,
                            query=body,
                            index=index,
                            doc_type=doc_type,
                            preserve_order=True)

    def search(self, body, index, doc_type):
        try:
            rsp = self.es.search(body=body,
                                 index=index,
                                 doc_type=doc_type,
                                 request_timeout=100)
            if rsp.get("errors"):
                print("es search error")
                return
            return rsp
        except Exception as e:
            print("es search error: " + str(e))

    def count(self, body, index, doc_type):
        return self.es.count(index=index,
                             doc_type=doc_type,
                             body=body,
                             request_timeout=100)

    def delete_index(self, index):
        return self.es.indices.delete(index=index)

Esempio n. 5

0

Mostra file

def push_data(data):

  try:
    es = Elasticsearch(
                        hosts=[{'host': ES_HOSTNAME, 'port': 443}],
                        http_auth=AWS4Auth(ES_ACCESS_KEY, ES_SECRET_KEY, REGION, 'es'),
                        use_ssl=True,
                        verify_certs=True, 
                        connection_class=RequestsHttpConnection
                        )
    state  = es.index(index=ES_INDEX,doc_type=ES_DOCTYPE, id=ES_ENDPOINT_ID, body=data)
    print ("POST STATUS: {}".format(state))
    return state
  except Exception as e:
      raise e

Esempio n. 6

0

Mostra file

class EsClientConnection:
    host = ''
    errorMessage = ''

    def __init__(self, host, index=None, type=None, body=None):
        '''
        创建的时候需要两个都要存在
        :param host:
        :param index:
        :param type:
        :param body:
        '''
        self.host = host
        self.conn = Elasticsearch([self.host])
        # 初始化mapping设置,即创建index
        indexExists = self.conn.indices.exists(index=index)
        typeExists = self.conn.indices.exists_type(index=index, doc_type=type)
        if body is not None:
            if indexExists is not True:
                if typeExists is not True:
                    self.conn.indices.create(index=index, body=body)
                else:
                    self.errorMessage = 'index not exists and type exists. it is not possible!'
            else:
                if typeExists is not True:
                    self.errorMessage = 'index index exists and type not exists'
                else:
                    self.errorMessage = 'index exists and type exists. you not need create it'

    def __del__(self):
        self.close()

    def check(self):
        '''
        输出当前系统的ES信息
        :return:
        '''
        return self.conn.info()

    def insertDocument(self, index, type, body, id=None):
        '''
        插入一条数据body到指定的index、指定的type下;可指定Id,若不指定,ES会自动生成
        :param index: 待插入的index值
        :param type: 待插入的type值
        :param body: 待插入的数据 -> dict型
        :param id: 自定义Id值
        :return:
        '''
        return self.conn.index(index=index, doc_type=type, body=body, id=id)

    def insertDataFrame(self, index, type, dataFrame):
        '''
        批量插入接口;
        bulk接口所要求的数据列表结构为:[{{optionType}: {Condition}}, {data}]
        其中optionType可为index、delete、update
        Condition可设置每条数据所对应的index值和type值
        data为具体要插入/更新的单条数据
        :param index: 默认插入的index值
        :param type: 默认插入的type值
        :param dataFrame: 待插入数据集
        :return:
        '''
        dataList = dataFrame.to_dict(orient='records')
        insertHeadInfoList = [{"index": {}} for i in range(len(dataList))]
        temp = [dict] * (len(dataList) * 2)
        temp[::2] = insertHeadInfoList
        temp[1::2] = dataList
        try:
            return self.conn.bulk(index=index, doc_type=type, body=temp)
        except Exception as e:
            return str(e)

    def deleteDocById(self, index, type, id):
        '''
        删除指定index、type、id对应的数据
        :param index:
        :param type:
        :param id:
        :return:
        '''
        return self.conn.delete(index=index, doc_type=type, id=id)

    def deleteDocByQuery(self, index, query, type=None):
        '''
        删除idnex下符合条件query的所有数据
        :param index:
        :param query: 满足DSL语法格式
        :param type:
        :return:
        '''
        return self.conn.delete_by_query(index=index,
                                         body=query,
                                         doc_type=type)

    def deleteAllDocByIndex(self, index, type=None):
        '''
        删除指定index下的所有数据
        :param index:
        :return:
        '''
        try:
            query = {'query': {'match_all': {}}}
            return self.conn.delete_by_query(index=index,
                                             body=query,
                                             doc_type=type)
        except Exception as e:
            return str(e) + ' -> ' + index

    def searchDoc(self, index=None, type=None, body=None):
        '''
        查找index下所有符合条件的数据
        :param index:
        :param type:
        :param body: 筛选语句,符合DSL语法格式
        :return:
        '''
        return self.conn.search(index=index, doc_type=type, body=body)

    def getDocById(self, index, type, id):
        '''
        获取指定index、type、id对应的数据
        :param index:
        :param type:
        :param id:
        :return:
        '''
        return self.conn.get(index=index, doc_type=type, id=id)

    def updateDocById(self, index, type, id, body=None):
        '''
        更新指定index、type、id所对应的数据
        :param index:
        :param type:
        :param id:
        :param body: 待更新的值
        :return:
        '''
        return self.conn.update(index=index, doc_type=type, id=id, body=body)

    def close(self):
        if self.conn is not None:
            try:
                self.conn.close()
            except Exception as e:
                pass
            finally:
                self.conn = None

    def mysqlToEs(self, mysqlData):
        doc = []
        for value in mysqlData:
            doc.append({"index": {}})
            doc.append(value)
        self.conn.bulk(index='product', doc_type='tour_product', body=doc)

Esempio n. 7

0

Mostra file

File: shove.py Progetto: nmaekawa/iiif_utils

        yield fh
    finally:
        if filename is not '-':
            fh.close()


if __name__ == "__main__":
    if len(sys.argv) > 1:
        args = sys.argv[1]
    else:
        args = '-'

    with _smart_open(args) as handle:
        content = handle.read()

    es = Elasticsearch(
        ['localhost'],
        http_auth=('elastic', 'changeme'))
    all_manifests = json.loads(content)
    print('---- starting! ----')

    for mani in all_manifests['hits']['hits']:
        es.index(
            index='manifests',
            doc_type=mani['_type'],
            id=mani['_id'],
            body=mani['_source'])
        print('{} : {}'.format(mani['_type'], mani['_id']))

    print('---- finished! ----')

Esempio n. 8

0

Mostra file

File: client.py Progetto: eliass97/IRS_Projects_AUEB_2

     # Opens the file
     with open(str(filename), 'r', encoding='utf-8') as fd:
         # Reads the first line - files in team_2 have 0 or 1 lines each
         line = fd.readline()
     counter += 1
     package = {
         # Keeps the number from ######.txt
         'rcn': str(file[:-4]),
         # Removes the space at the beginning of the text
         'text': str(line[1:])
     }
     if counter % 100 == 0:
         print("Files processed: " + str(counter))
     # Uploads the package-json to elasticsearch using as id the unique name of the file
     es.index(index='test2',
              doc_type='project',
              id=package['rcn'],
              body=package)
 print("Files processed: " + str(counter))
 # Closes the indices - changes the settings to TF-IDF - opens the indices
 es.indices.close(index='test2')
 es.indices.put_settings(
     index='test2',
     body={'index': {
         'similarity': {
             'default': {
                 'type': 'classic'
             }
         }
     }})
 es.indices.open(index='test2')
 time.sleep(1)

Esempio n. 9

0

Mostra file

File: elastic-store-documents.py Progetto: semproj-tierversuche/pubmed-tools

#!/usr/bin/env python3
import os

from elasticsearch5 import Elasticsearch

data_dir = "data/documents/"

es = Elasticsearch()
es_options = {"index": "article_test", "doc_type": "article"}

for file in os.listdir(data_dir):
    if not file.endswith(".json"):
        continue
    path = os.path.join(data_dir, file)
    with open(path) as f:
        body = f.read()
    print("Storing document from {}...".format(file))
    es.index(**es_options, body=body)

Esempio n. 10

0

Mostra file

File: gatlingParser.py Progetto: aktion-insite/epib2bdev

        print("Pushing to ElasticSearch")
        header = [
            'Name', "Request count", "Min response time", "Max response time",
            "Mean response time", "Std deviation",
            "Response time 50th percentile", "Response time 75th percentile",
            "Response time 95th percentile", "Response time 99th percentile",
            "800 ms < t < 1200 ms", "t < 800 ms", "t > 1200 ms",
            "Failed Percentage", "Reqs/s"
        ]

        template['test_name'] = l1[0]
        for i in range(1, len(header)):
            template['metric'] = header[i]
            template['value'] = int(l1[i])
            res = es.index(index='gatling-' + str(dateNow),
                           doc_type='gatling',
                           body=template)

finalArray.append(arr1)

header = [[
    'Name', "Request count", "Min response time", "Max response time",
    "Mean response time", "Std deviation", "Response time 50th percentile",
    "Response time 75th percentile", "Response time 95th percentile",
    "Response time 99th percentile", "800 ms < t < 1200 ms", "t < 800 ms",
    "t > 1200 ms", "Failed Percentage", "Reqs/s"
]]
whiteRow = [[' ', ' ', ' ', ' ', ' ']]
finalArray.insert(0, header)
my_df = pd.DataFrame()
for i in range(len(finalArray)):

Esempio n. 11

0

Mostra file

File: kafka_lib.py Progetto: huruizhi/python_learning_demo

    consumer_inner = KafkaC("172.16.10.214", 9092, topic, 'log')

    # producer = KafkaP("172.16.10.246", 9092, topic)
    # data = '{"id": "c57efd7b4f8b237690b4c37f624efa7b","url": "http://finance.sina.com.cn/world/gjcj/2018-07-06/doc-ihexfcvk3564898.shtml","content": "123"}'
    # print(data)
    # data_1 = data.encode('utf-8')
    # producer.send_json_data(data_1)

    from elasticsearch5 import Elasticsearch

    message = consumer_inner.consume_data()

    es = Elasticsearch(hosts='elasticsearch-logging.logging.svc.cluster.local')

    for msg in message:
        offset = msg.offset
        print(offset)
        value = msg.value
        value_dic = json.loads(value)
        date_today = datetime.datetime.now().strftime('%Y-%m-%d')
        timestrap = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f+08:00')
        value_dic['timestrap'] = timestrap
        if 'profile' in value_dic:
            index = "java-log-{env}-{date}".format(env=value_dic['profile'].lower(), date=date_today)
            try:
                es.index(index=index, doc_type='javalog', body=value_dic)
            except Exception as e:
                print(value_dic)

Esempio n. 12

0

Mostra file

File: elastic-query.py Progetto: devopsenggineer/devSetup

#res = es.get(index="test-index", id=1)
#print(res['_source'])

#es.indices.refresh(index="test-index")
indexes = es.indices.get('*')
#print(indexes)
for j in range(0, 10):
    print("value of j is: ", j)
    for i in indexes:
        print(i)
        print(" ")

        res = es.search(index=i,
                        body={
                            "query": {
                                "match_all": {}
                            },
                            "size": 1000
                        })
        #res = es.search(index="fx-testsuite-responses", body={"query": {"match_all": {}}, "size": 1000})
        #print((res))
        #print("Got %d Hits:" % res['hits']['total']['value'])

        for hit in res['hits']['hits']:
            #print("Hello")
            #print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])
            #es.index(index="fx-testsuite-responses",body={hit})
            a = hit["_source"]
            e = es.index(index=i, doc_type="test", body=a)
            #e = es.index(index="fx-testsuite-responses",doc_type="test" ,body=a)

Esempio n. 13

0

Mostra file

class ElasticHelper(object):
    def __init__(self):
        self.es = Elasticsearch(ElasticConfig.uri)
        self._multi_search_results = []
        self.bulk_task_queue = []
        self.bulk_last_time = datetime_now_obj()

    def delay_index(self, body, index, doc_type):
        self.bulk_task_queue.append(
            {"index": {
                "_index": index,
                "_type": doc_type
            }})
        self.bulk_task_queue.append(body)

        if self._can_do_bulk():
            self.bulk(body=self.bulk_task_queue,
                      index=index,
                      doc_type=doc_type)
            self.bulk_task_queue = []

        self.bulk_last_time = datetime_now_obj()

    def _can_do_bulk(self):
        # 任务队列超过100条数据
        if len(self.bulk_task_queue) > 100:
            return True
        # 时间间隔超过1分钟
        if get_n_min_ago(1) > self.bulk_last_time:
            return True
        return False

    def index(self, body, index, doc_type):
        self.es.index(body=body, index=index, doc_type=doc_type)

    def bulk(self, body, index, doc_type):
        self.es.bulk(body=body, index=index, doc_type=doc_type)

    def scan(self, body, index, doc_type):
        return helpers.scan(self.es,
                            query=body,
                            index=index,
                            doc_type=doc_type,
                            preserve_order=True)

    def search(self, body, index, doc_type):
        try:
            rsp = self.es.search(body=body,
                                 index=index,
                                 doc_type=doc_type,
                                 request_timeout=100)
            if rsp.get("error"):
                logger.error(rsp.get("error").get("reason"))
                return
            return rsp
        except Exception as e:
            print(body)
            logger.error("es search error: " + str(e) + index)

    def count(self, body, index, doc_type):
        return self.es.count(index=index,
                             doc_type=doc_type,
                             body=body,
                             request_timeout=100)

    def delete_index(self, index):
        return self.es.indices.delete(index=index)

    def put_template(self, name, body, **kwargs):
        return self.es.indices.put_template(name=name, body=body, **kwargs)

    def exists_template(self, name, **kwargs) -> bool:
        return self.es.indices.exists_template(name=name, **kwargs)

    def delete_template(self, name, **kwargs):
        return self.es.indices.delete_template(name=name, **kwargs)

    def get_template(self, name, **kwargs):
        return self.es.indices.get_template(name=name, **kwargs)

    def wait_log_in_database(self, computer_name, record_number):
        """
            因为消息队列和入库ES是分开进行的，所以可能会出现当消费到某条日志时，ES还没入库，所以需要检查同步
        """
        count = 0
        query = {
            "query":
            get_must_statement(
                get_term_statement("computer_name", computer_name),
                get_term_statement("record_number", record_number)),
            "_source":
            False,
            "size":
            1
        }
        while True:
            try:
                rsp = self.es.search(body=query,
                                     index=ElasticConfig.event_log_index,
                                     doc_type=ElasticConfig.event_log_doc_type,
                                     request_timeout=100)
                if rsp.get("error"):
                    logger.error(rsp.get("error").get("reason"))
                    break
                if len(rsp["hits"]["hits"]) > 0:
                    return rsp["hits"]["hits"][0]["_id"]
                time.sleep(2)
                # 最多等5次，即 2 * 5 = 10秒
                if count == 10:
                    break
                count += 1
            except Exception as e:
                logger.error("es wait_log_in_database search error: " + str(e))
                break

    def multi_search(self, body, index, doc_type):
        try:
            rsp = self.es.msearch(body=body,
                                  index=index,
                                  doc_type=doc_type,
                                  request_timeout=100)
            if rsp.get("error"):
                logger.error(rsp.get("error").get("reason"))
                return
            return rsp
        except Exception as e:
            logger.error("es msearch error: " + str(e))