def store_book_details(search_book: str, book_dict: dict) -> None: elasticsearch = Elasticsearch() elasticsearch.index(index='book_details', doc_type='books', id=search_book.lower(), body=json.dumps(book_dict)) logging.info('Details stored...')
def add_test(): es = Elasticsearch() doc = { 'author': 'kimchy', 'text': 'Elasticsearch: cool. bonsai cool.', 'timestamp': int(round(time.time() * 1000)), 'money': 22 } res = es.index(index="test-index", doc_type='tweet', id=7, body=doc) doc = { 'author': 'tolstoy', 'text': 'hi', 'timestamp': int(round(time.time() * 1000)), 'money': 30 } res = es.index(index="test-index", doc_type='tweet', id=10, body=doc) print res
def read_log(topic): consumer_inner = KafkaC("172.16.10.214", 9092, topic, 'log-test') message = consumer_inner.consume_data() es = Elasticsearch(hosts='elasticsearch-logging.logging.svc.cluster.local') for msg in message: offset = msg.offset print(offset) value = msg.value value_dic = json.loads(value) date_today = datetime.datetime.now().strftime('%Y-%m-%d') timestrap = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f+08:00') value_dic['timestrap'] = timestrap if 'profile' in value_dic: print(value_dic) index = "java-log-{env}-{date}".format(env=value_dic['profile'].lower(), date=date_today) try: es.index(index=index, doc_type='javalog', body=value_dic) except Exception as e: print(value_dic)
class ElasticHelper(object): def __init__(self): self.es = Elasticsearch(ElasticConfig.uri) def index(self, body, index, doc_type): self.es.index(body=body, index=index, doc_type=doc_type) def bulk(self, body, index, doc_type): self.es.bulk(body=body, index=index, doc_type=doc_type) def scan(self, body, index, doc_type): return helpers.scan(self.es, query=body, index=index, doc_type=doc_type, preserve_order=True) def search(self, body, index, doc_type): try: rsp = self.es.search(body=body, index=index, doc_type=doc_type, request_timeout=100) if rsp.get("errors"): print("es search error") return return rsp except Exception as e: print("es search error: " + str(e)) def count(self, body, index, doc_type): return self.es.count(index=index, doc_type=doc_type, body=body, request_timeout=100) def delete_index(self, index): return self.es.indices.delete(index=index)
def push_data(data): try: es = Elasticsearch( hosts=[{'host': ES_HOSTNAME, 'port': 443}], http_auth=AWS4Auth(ES_ACCESS_KEY, ES_SECRET_KEY, REGION, 'es'), use_ssl=True, verify_certs=True, connection_class=RequestsHttpConnection ) state = es.index(index=ES_INDEX,doc_type=ES_DOCTYPE, id=ES_ENDPOINT_ID, body=data) print ("POST STATUS: {}".format(state)) return state except Exception as e: raise e
class EsClientConnection: host = '' errorMessage = '' def __init__(self, host, index=None, type=None, body=None): ''' 创建的时候需要两个都要存在 :param host: :param index: :param type: :param body: ''' self.host = host self.conn = Elasticsearch([self.host]) # 初始化mapping设置,即创建index indexExists = self.conn.indices.exists(index=index) typeExists = self.conn.indices.exists_type(index=index, doc_type=type) if body is not None: if indexExists is not True: if typeExists is not True: self.conn.indices.create(index=index, body=body) else: self.errorMessage = 'index not exists and type exists. it is not possible!' else: if typeExists is not True: self.errorMessage = 'index index exists and type not exists' else: self.errorMessage = 'index exists and type exists. you not need create it' def __del__(self): self.close() def check(self): ''' 输出当前系统的ES信息 :return: ''' return self.conn.info() def insertDocument(self, index, type, body, id=None): ''' 插入一条数据body到指定的index、指定的type下;可指定Id,若不指定,ES会自动生成 :param index: 待插入的index值 :param type: 待插入的type值 :param body: 待插入的数据 -> dict型 :param id: 自定义Id值 :return: ''' return self.conn.index(index=index, doc_type=type, body=body, id=id) def insertDataFrame(self, index, type, dataFrame): ''' 批量插入接口; bulk接口所要求的数据列表结构为:[{{optionType}: {Condition}}, {data}] 其中optionType可为index、delete、update Condition可设置每条数据所对应的index值和type值 data为具体要插入/更新的单条数据 :param index: 默认插入的index值 :param type: 默认插入的type值 :param dataFrame: 待插入数据集 :return: ''' dataList = dataFrame.to_dict(orient='records') insertHeadInfoList = [{"index": {}} for i in range(len(dataList))] temp = [dict] * (len(dataList) * 2) temp[::2] = insertHeadInfoList temp[1::2] = dataList try: return self.conn.bulk(index=index, doc_type=type, body=temp) except Exception as e: return str(e) def deleteDocById(self, index, type, id): ''' 删除指定index、type、id对应的数据 :param index: :param type: :param id: :return: ''' return self.conn.delete(index=index, doc_type=type, id=id) def deleteDocByQuery(self, index, query, type=None): ''' 删除idnex下符合条件query的所有数据 :param index: :param query: 满足DSL语法格式 :param type: :return: ''' return self.conn.delete_by_query(index=index, body=query, doc_type=type) def deleteAllDocByIndex(self, index, type=None): ''' 删除指定index下的所有数据 :param index: :return: ''' try: query = {'query': {'match_all': {}}} return self.conn.delete_by_query(index=index, body=query, doc_type=type) except Exception as e: return str(e) + ' -> ' + index def searchDoc(self, index=None, type=None, body=None): ''' 查找index下所有符合条件的数据 :param index: :param type: :param body: 筛选语句,符合DSL语法格式 :return: ''' return self.conn.search(index=index, doc_type=type, body=body) def getDocById(self, index, type, id): ''' 获取指定index、type、id对应的数据 :param index: :param type: :param id: :return: ''' return self.conn.get(index=index, doc_type=type, id=id) def updateDocById(self, index, type, id, body=None): ''' 更新指定index、type、id所对应的数据 :param index: :param type: :param id: :param body: 待更新的值 :return: ''' return self.conn.update(index=index, doc_type=type, id=id, body=body) def close(self): if self.conn is not None: try: self.conn.close() except Exception as e: pass finally: self.conn = None def mysqlToEs(self, mysqlData): doc = [] for value in mysqlData: doc.append({"index": {}}) doc.append(value) self.conn.bulk(index='product', doc_type='tour_product', body=doc)
yield fh finally: if filename is not '-': fh.close() if __name__ == "__main__": if len(sys.argv) > 1: args = sys.argv[1] else: args = '-' with _smart_open(args) as handle: content = handle.read() es = Elasticsearch( ['localhost'], http_auth=('elastic', 'changeme')) all_manifests = json.loads(content) print('---- starting! ----') for mani in all_manifests['hits']['hits']: es.index( index='manifests', doc_type=mani['_type'], id=mani['_id'], body=mani['_source']) print('{} : {}'.format(mani['_type'], mani['_id'])) print('---- finished! ----')
# Opens the file with open(str(filename), 'r', encoding='utf-8') as fd: # Reads the first line - files in team_2 have 0 or 1 lines each line = fd.readline() counter += 1 package = { # Keeps the number from ######.txt 'rcn': str(file[:-4]), # Removes the space at the beginning of the text 'text': str(line[1:]) } if counter % 100 == 0: print("Files processed: " + str(counter)) # Uploads the package-json to elasticsearch using as id the unique name of the file es.index(index='test2', doc_type='project', id=package['rcn'], body=package) print("Files processed: " + str(counter)) # Closes the indices - changes the settings to TF-IDF - opens the indices es.indices.close(index='test2') es.indices.put_settings( index='test2', body={'index': { 'similarity': { 'default': { 'type': 'classic' } } }}) es.indices.open(index='test2') time.sleep(1)
#!/usr/bin/env python3 import os from elasticsearch5 import Elasticsearch data_dir = "data/documents/" es = Elasticsearch() es_options = {"index": "article_test", "doc_type": "article"} for file in os.listdir(data_dir): if not file.endswith(".json"): continue path = os.path.join(data_dir, file) with open(path) as f: body = f.read() print("Storing document from {}...".format(file)) es.index(**es_options, body=body)
print("Pushing to ElasticSearch") header = [ 'Name', "Request count", "Min response time", "Max response time", "Mean response time", "Std deviation", "Response time 50th percentile", "Response time 75th percentile", "Response time 95th percentile", "Response time 99th percentile", "800 ms < t < 1200 ms", "t < 800 ms", "t > 1200 ms", "Failed Percentage", "Reqs/s" ] template['test_name'] = l1[0] for i in range(1, len(header)): template['metric'] = header[i] template['value'] = int(l1[i]) res = es.index(index='gatling-' + str(dateNow), doc_type='gatling', body=template) finalArray.append(arr1) header = [[ 'Name', "Request count", "Min response time", "Max response time", "Mean response time", "Std deviation", "Response time 50th percentile", "Response time 75th percentile", "Response time 95th percentile", "Response time 99th percentile", "800 ms < t < 1200 ms", "t < 800 ms", "t > 1200 ms", "Failed Percentage", "Reqs/s" ]] whiteRow = [[' ', ' ', ' ', ' ', ' ']] finalArray.insert(0, header) my_df = pd.DataFrame() for i in range(len(finalArray)):
consumer_inner = KafkaC("172.16.10.214", 9092, topic, 'log') # producer = KafkaP("172.16.10.246", 9092, topic) # data = '{"id": "c57efd7b4f8b237690b4c37f624efa7b","url": "http://finance.sina.com.cn/world/gjcj/2018-07-06/doc-ihexfcvk3564898.shtml","content": "123"}' # print(data) # data_1 = data.encode('utf-8') # producer.send_json_data(data_1) from elasticsearch5 import Elasticsearch message = consumer_inner.consume_data() es = Elasticsearch(hosts='elasticsearch-logging.logging.svc.cluster.local') for msg in message: offset = msg.offset print(offset) value = msg.value value_dic = json.loads(value) date_today = datetime.datetime.now().strftime('%Y-%m-%d') timestrap = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f+08:00') value_dic['timestrap'] = timestrap if 'profile' in value_dic: index = "java-log-{env}-{date}".format(env=value_dic['profile'].lower(), date=date_today) try: es.index(index=index, doc_type='javalog', body=value_dic) except Exception as e: print(value_dic)
#res = es.get(index="test-index", id=1) #print(res['_source']) #es.indices.refresh(index="test-index") indexes = es.indices.get('*') #print(indexes) for j in range(0, 10): print("value of j is: ", j) for i in indexes: print(i) print(" ") res = es.search(index=i, body={ "query": { "match_all": {} }, "size": 1000 }) #res = es.search(index="fx-testsuite-responses", body={"query": {"match_all": {}}, "size": 1000}) #print((res)) #print("Got %d Hits:" % res['hits']['total']['value']) for hit in res['hits']['hits']: #print("Hello") #print("%(timestamp)s %(author)s: %(text)s" % hit["_source"]) #es.index(index="fx-testsuite-responses",body={hit}) a = hit["_source"] e = es.index(index=i, doc_type="test", body=a) #e = es.index(index="fx-testsuite-responses",doc_type="test" ,body=a)
class ElasticHelper(object): def __init__(self): self.es = Elasticsearch(ElasticConfig.uri) self._multi_search_results = [] self.bulk_task_queue = [] self.bulk_last_time = datetime_now_obj() def delay_index(self, body, index, doc_type): self.bulk_task_queue.append( {"index": { "_index": index, "_type": doc_type }}) self.bulk_task_queue.append(body) if self._can_do_bulk(): self.bulk(body=self.bulk_task_queue, index=index, doc_type=doc_type) self.bulk_task_queue = [] self.bulk_last_time = datetime_now_obj() def _can_do_bulk(self): # 任务队列超过100条数据 if len(self.bulk_task_queue) > 100: return True # 时间间隔超过1分钟 if get_n_min_ago(1) > self.bulk_last_time: return True return False def index(self, body, index, doc_type): self.es.index(body=body, index=index, doc_type=doc_type) def bulk(self, body, index, doc_type): self.es.bulk(body=body, index=index, doc_type=doc_type) def scan(self, body, index, doc_type): return helpers.scan(self.es, query=body, index=index, doc_type=doc_type, preserve_order=True) def search(self, body, index, doc_type): try: rsp = self.es.search(body=body, index=index, doc_type=doc_type, request_timeout=100) if rsp.get("error"): logger.error(rsp.get("error").get("reason")) return return rsp except Exception as e: print(body) logger.error("es search error: " + str(e) + index) def count(self, body, index, doc_type): return self.es.count(index=index, doc_type=doc_type, body=body, request_timeout=100) def delete_index(self, index): return self.es.indices.delete(index=index) def put_template(self, name, body, **kwargs): return self.es.indices.put_template(name=name, body=body, **kwargs) def exists_template(self, name, **kwargs) -> bool: return self.es.indices.exists_template(name=name, **kwargs) def delete_template(self, name, **kwargs): return self.es.indices.delete_template(name=name, **kwargs) def get_template(self, name, **kwargs): return self.es.indices.get_template(name=name, **kwargs) def wait_log_in_database(self, computer_name, record_number): """ 因为消息队列和入库ES是分开进行的,所以可能会出现当消费到某条日志时,ES还没入库,所以需要检查同步 """ count = 0 query = { "query": get_must_statement( get_term_statement("computer_name", computer_name), get_term_statement("record_number", record_number)), "_source": False, "size": 1 } while True: try: rsp = self.es.search(body=query, index=ElasticConfig.event_log_index, doc_type=ElasticConfig.event_log_doc_type, request_timeout=100) if rsp.get("error"): logger.error(rsp.get("error").get("reason")) break if len(rsp["hits"]["hits"]) > 0: return rsp["hits"]["hits"][0]["_id"] time.sleep(2) # 最多等5次,即 2 * 5 = 10秒 if count == 10: break count += 1 except Exception as e: logger.error("es wait_log_in_database search error: " + str(e)) break def multi_search(self, body, index, doc_type): try: rsp = self.es.msearch(body=body, index=index, doc_type=doc_type, request_timeout=100) if rsp.get("error"): logger.error(rsp.get("error").get("reason")) return return rsp except Exception as e: logger.error("es msearch error: " + str(e))