def get(self, loop=None): if self._conn is None: self._conn = Elasticsearch( loop=loop, **app_settings.get("elasticsearch", {}).get("connection_settings"), ) return self._conn
async def setSingleCache(self, cacheModel: EsCacheModel) -> bool: """ 写入单条缓存数据 classStr:类名称 phoneStr:手机号码 dataType:缓存数据类型 month:日期(格式:yyyyMM) page:页码 maxPages:总页数 dataItems:缓存数据 返回:是否成功写入 """ _item = cacheModel try: async with Elasticsearch([{ 'host': config[ELK.ELK_HOST.value], 'port': config[ELK.ELK_PORT.value], "timeout": 360000 }]) as es: await es.index(index=self.esindex, body=_item.getDict(), doc_type=self.estype, id=_item.getId()) return True except Exception: traceback.print_exc() return False
async def add_elk(token, date_item, index=config.ELK_INDEX, doc_type='dataitem'): try: date_item.update({ '@timestamp': (datetime.datetime.now() + relativedelta(hours=-8)).strftime('%Y-%m-%dT%H:%M:%S.000Z') }) async with Elasticsearch([{ 'host': config.ELK_HOST, 'port': config.ELK_PORT, 'http_auth': (config[ELK.USR.value], config[ELK.PWD.value]), "timeout": 360000 }]) as es_client: await es_client.index(index=index, body=date_item, doc_type=doc_type) except: import traceback print(traceback.format_exc()) await log(token, status_code=diy.DIY_STATUS.value, message=f'存入elk报错', iselk=False)
async def search(request): es = Elasticsearch() q = request.query.get('q') try: limit = int(request.query.get('limit', 0)) offset = int(request.query.get('offset', 0)) except: return json_response({'response': 'wrong query'}) body = {} if q: body['query'] = {'match': {'text': q}} async with Scan( es, index=index_name, doc_type='crawler', query=body, ) as scan_res: res_source, count = await format_search(scan_res, limit, offset) text = { 'total_hits': count, 'count': len(res_source), 'results': res_source } return json_response(text)
async def getAllCache(self, classStr: str, phoneStr: str, dataType: FLAG, monthRange: List[str]) -> List[EsCacheModel]: """ 根据指定条件获得所有缓存 classStr:类名称 phoneStr:手机号码 dataType:缓存数据类型 monthRange:日期数组 返回类型:list(EsCacheModel) """ result = [] monthRange = [f"month:{month}" for month in monthRange] query = 'apiClass:{} AND phoneNo:{} AND dataType:{} AND ({})'.format( classStr, phoneStr, dataType.value, ' OR '.join(monthRange)) async with Elasticsearch([{ 'host': config[ELK.ELK_HOST.value], 'port': config[ELK.ELK_PORT.value], "timeout": 360000 }]) as es: r = await es.search(index=self.esindex, doc_type=self.estype, q=query) r = ExObject(r) for item in r["?hits"]["?hits"]: _ec = EsCacheModel() _ec.apiClass = item["?_source"]["?apiClass"].ToString() _ec.month = item["?_source"]["?month"].ToString() _ec.phoneNo = item["?_source"]["?phoneNo"].ToString() _ec.dataType = item["?_source"]["?dataType"].ToString() _ec.page = item["?_source"]["?page"].ToOriginal() _ec.maxPages = item["?_source"]["?maxPages"].ToOriginal() _ec.updateTime = item["?_source"]["?updateTime"].ToOriginal() _ec.items = item["?_source"]["?items"].ToOriginal() result.append(_ec) return result
async def aioes_request(request): es = Elasticsearch(data['elasticsearch']['hosts']) query = request.rel_url.query if 'q' in query: q = query['q'] else: return [] try: limit = int(query['limit']) if 'limit' in query else 10 except ValueError: limit = 10 try: offset = int(query['offset']) if 'offset' in query else 0 except ValueError: offset = 0 res = await es.search(index='site.docs.python.org', body={ 'query': { 'match': { 'content': q } }, 'size': limit, 'from': offset }) urls = [r['_source']['url'] for r in res['hits']['hits']] await es.close() return web.Response(text=json.dumps({ 'request': q, 'limit': limit, 'offset': offset, 'urls': urls }))
async def main(self): async with Elasticsearch([{'host': 'localhost', 'port': 9200}]) as es: await self.initialize_index(es) await self.links.put(self.start_url) async with aiohttp.ClientSession() as session: async with asyncpool.AsyncPool( self.loop, num_workers=10, name="CrawlerPool", logger=logging.getLogger("CrawlerPool"), worker_co=self.worker) as pool: link = await self.links.get() await pool.push(link, es, session) while True: if not self.links.empty(): link = await self.links.get() else: await asyncio.sleep(0.2) if self.links.empty(): break link = await self.links.get() await asyncio.sleep(self.sleep_time) await pool.push(link=link, es=es, session=session)
async def test_get_apps_stats_with_data(self): """ Prepara um ElasticSearch com alguns dados e faz o cálculo agregado do uso de CPU e RAM """ app_stats_datapoints = get_fixture( f"agents/ead07ffb-5a61-42c9-9386-21b680597e6c-S0/app_stats.json") app = MesosApp(id="infra/asgard/api") await self._load_app_stats_into_storage(self.INDEX_NAME, self.utc_now, app_stats_datapoints) backend = MarathonAppsBackend() user = User(**USER_WITH_MULTIPLE_ACCOUNTS_DICT) account = Account(**ACCOUNT_DEV_DICT) async with Elasticsearch([settings.STATS_API_URL]) as es: raw = await es.search(index=self.INDEX_NAME) cpu_pcts = [ hit["_source"]["cpu_pct"] for hit in raw["hits"]["hits"] ] mem_pcts = [ hit["_source"]["mem_pct"] for hit in raw["hits"]["hits"] ] self.assertEqual(5, len(cpu_pcts)) self.assertEqual(5, len(mem_pcts)) app_stats = await backend.get_app_stats(app, user, account) self.assertEqual( AppStats(cpu_pct="0.25", ram_pct="15.05", cpu_thr_pct="1.00"), app_stats, )
async def test_send_get_body_as_get(es_server, auto_close, loop): cl = auto_close(Elasticsearch([{'host': es_server['host'], 'port': es_server['port']}], http_auth=es_server['auth'], loop=loop)) await cl.create('test', 'type', '1', {'val': '1'}) await cl.create('test', 'type', '2', {'val': '2'}) ret = await cl.mget( {"docs": [ {"_id": "1"}, {"_id": "2"} ]}, index='test', doc_type='type') assert ret == {'docs': [{'_id': '1', '_index': 'test', '_source': {'val': '1'}, '_type': 'type', '_version': 1, 'found': True}, {'_id': '2', '_index': 'test', '_source': {'val': '2'}, '_type': 'type', '_version': 1, 'found': True}]}
async def test_send_get_body_as_source_none_params(es_server, auto_close, loop): cl = auto_close(Elasticsearch([{'host': es_server['host'], 'port': es_server['port']}], send_get_body_as='source', http_auth=es_server['auth'], loop=loop)) await cl.create('test', 'type', '1', {'val': '1'}) await cl.create('test', 'type', '2', {'val': '2'}) ret = await cl.transport.perform_request( 'GET', 'test/type/_mget', body={"docs": [ {"_id": "1"}, {"_id": "2"} ]}) assert ret == {'docs': [{'_id': '1', '_index': 'test', '_source': {'val': '1'}, '_type': 'type', '_version': 1, 'found': True}, {'_id': '2', '_index': 'test', '_source': {'val': '2'}, '_type': 'type', '_version': 1, 'found': True}]}
async def search(self,request): try: data = await request.json() vk_id = data['vk_id'] token = data['app_token'] access_token = data['access_token'] search_querry = str(data['search']) except: response = jsoner(status=400) return web.json_response(response) correct = await self.auth_.authorize(user_id = vk_id, access_token = access_token,sign=token) if correct.decode() == 'False': response = jsoner(status=403) return web.json_response(response) data = [] try: async with Elasticsearch(elastic_settings) as es: res = await es.search(index='obi',doc_type='product', body={"query":{"terms":{"name":[search_querry]}}}) if len(res['hits']['hits']) == 0: res = await es.search(index='obi',doc_type='product', body={"query":{"match":{"name":'name2 name3'}}}) for prod in res['hits']['hits']: dic={"product_id":prod['_id']} dic.update(prod['_source']) data.append(dic) except: return web.json_response({'status':'404','text':'NOT FOUND'}) response = jsoner(status=200,searchlist=data) return web.json_response(response)
async def es_range(self, index, tp, *keys, call=None, **query): async with Elasticsearch([self.host]) as es: async with Scan( es, index=index, doc_type=tp, query=query, ) as scan: res = [] count = await es.count(index=index) count = count['count'] progressbar = tqdm(desc="scan all elasticsearch", total=count) ic = 0 si = count / 1000 async for doc in scan: ic += 1 if ic > 0 and ic % 1000 == 0: progressbar.update(si) if call: call(doc) else: dd = {} for k in keys: km = k.split(':') v = doc for kk in km: v = v.get(kk) if not v: break dd[k] = v res.append(dd) progressbar.close() return res
async def cleanup_es(es_host, prefix=""): conn = Elasticsearch(**app_settings["elasticsearch"]["connection_settings"]) for alias in (await conn.cat.aliases()).splitlines(): name, index = alias.split()[:2] if name[0] == "." or index[0] == ".": # ignore indexes that start with . continue if name.startswith(prefix): try: await conn.indices.delete_alias(index, name) await conn.indices.delete(index) except ( elasticsearch.exceptions.AuthorizationException, elasticsearch.exceptions.NotFoundError, ): pass for index in (await conn.cat.indices()).splitlines(): _, _, index_name = index.split()[:3] if index_name[0] == ".": # ignore indexes that start with . continue if index_name.startswith(prefix): try: await conn.indices.delete(index_name) except elasticsearch.exceptions.AuthorizationException: pass
def test_elastic_default_loop(auto_close, loop): asyncio.set_event_loop(loop) es = Elasticsearch() auto_close(es) assert es.loop is loop
async def connect_elasticsearch(): _es = None _es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) if await _es.ping(): print('Yay Connect') else: print('Awww it could not connect!') return _es
def es(loop): es = Elasticsearch(loop=loop) delete_template = es.transport.perform_request( 'DELETE', '/_template/*', ) delete_all = es.transport.perform_request( 'DELETE', '/_all', ) coros = [delete_template, delete_all] coro = asyncio.gather(*coros, loop=loop) loop.run_until_complete(coro) try: yield es finally: loop.run_until_complete(es.close())
async def main(self, number): tasks = [] q = asyncio.Queue() q.put_nowait(self.host) async with aiohttp.ClientSession() as client: for i in range(number): _es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) task = asyncio.create_task(self.get_links(q, client, _es)) tasks.append(task) await asyncio.gather(*tasks)
async def search(q, limit=None, offset=0): async with Elasticsearch() as es: body = {"query": {"match": {"body": q}}} scan = await es.search(size=limit, from_=offset, index="_all", doc_type="crawler_links", body=body) docs = scan["hits"]["hits"] return [doc["_source"]["link"] for doc in docs]
async def test_bulk_raise_exception(loop): asyncio.set_event_loop(loop) es = Elasticsearch() datas = [{'_op_type': 'delete', '_index': 'test_aioes', '_type': 'type_3', '_id': "999"} ] with pytest.raises(TransportError): success, fails = await bulk(es, datas, stats_only=True)
async def search(conn, body, dtm): #speaking honestly, i have no idea how can i search documents try: es = Elasticsearch() res = await es.get(index="test-index", doc_type='tweet') str(res['_source']).replace('\n', '') ###print(str(res['_source']).replace('\n','')[body['offset']:body['offset']+body['limit']])??? return json.dumps(body) except: return json.dumps(('error'))
async def myfun(self, quantity): tasks = list() q = asyncio.Queue() q.put_nowait('https://docs.python.org/') async with aiohttp.ClientSession() as session: es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) for _ in range(quantity): task = asyncio.create_task(self.crawling(q, session, es)) tasks.append(task) await asyncio.gather(*tasks)
def __init__(self, max_rps): self.max_tasks = max_rps self.q_url = asyncio.Queue() self.q_text = asyncio.Queue() self.q_rps = asyncio.Queue() self.seen_urls = set() self.sem = asyncio.Semaphore(max_rps) self.session = None self.es = Elasticsearch()
async def test_mark_live_not_dead(auto_close, es_server, loop): es = auto_close(Elasticsearch(hosts=[{'host': es_server['host'], 'port': es_server['port']}, {'host': 'unknown_host', 'port': 9200}], http_auth=es_server['auth'], loop=loop)) conn = await es.transport.get_connection() pool = es.transport.connection_pool pool.mark_live(conn) assert conn not in pool.dead_count
async def cleanup_es(es_host, prefix=''): conn = Elasticsearch(hosts=[es_host]) for alias in (await conn.cat.aliases()).splitlines(): name, index = alias.split()[:2] if name.startswith(prefix): await conn.indices.delete_alias(index, name) await conn.indices.delete(index) for index in (await conn.cat.indices()).splitlines(): _, _, index_name = index.split()[:3] if index_name.startswith(prefix): await conn.indices.delete(index_name)
async def myfun(self, quantity, domain, glub): tasks = list() q = asyncio.Queue() q.put_nowait(domain) async with aiohttp.ClientSession() as session: es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) for _ in range(quantity): task = asyncio.create_task(self.crawling(q, session, es, glub)) tasks.append(task) await asyncio.gather(*tasks) await self.waitress()
async def make_record_to_es(self, link, soup): cleaned_text = soup.get_text() doc = {"link": link, "body": cleaned_text} async with Elasticsearch(ingore=409) as es: try: await es.create( index="crawler_links", doc_type="crawler_links", id=link, body=doc ) await es.close() self.LINKS_COUNTER += 1 except elastic_exceptions.ConflictError: pass
async def clear_index(self): name = self.name index = self.status()[1].split("|")[0] async with Elasticsearch([i for i in self.host.split(",")]) as es: ss = self.load_session(name) pwd = self['passwd'] if pwd: e = getpass("passwd :") if e != pwd: logging.info( "error passwd to delete all data in index: %s" % index) return return await es.indices.delete(index)
async def test_mark_dead_removed_connection(auto_close, es_server, loop): es = auto_close(Elasticsearch(hosts=[{'host': es_server['host'], 'port': es_server['port']}, {'host': 'unknown_host', 'port': 9200}], http_auth=es_server['auth'], loop=loop)) conn = await es.transport.get_connection() pool = es.transport.connection_pool pool.mark_dead(conn) assert conn in pool.dead_count # second call should succeed pool.mark_dead(conn) assert conn in pool.dead_count
async def save_to_es(id, hand, data, loop): host = hand.get('es_host', 'localhost:9200') index = hand.get('es_index', 'es-main') doc_type = hand.get('es_type', 'es-doc') filter = hand.get('es_filter') type = hand.get('type') if type == 'json': data = json.loads(data) if filter: if type == 'json': filter_d = json.loads(filter) for k in filter_d: vv = filter_d[k] if isinstance(vv, list): if data.get(k) in vv: logging.info( colored("Filter: %s from data: {}".format(data) % id, 'yellow', attrs=['bold'])) return else: if data.get(k) == vv: logging.info( colored("Filter: %s from data: {}".format(data) % id, 'yellow', attrs=['bold'])) return else: if re.search(filter.encode('utf-8'), data): logging.info( colored("Filter: %s from data: {}".format(data[:100]) % id, 'yellow', attrs=['bold'])) return try: data = json.loads(data) except json.JSONDecodeError: pass async with Elasticsearch([i for i in host.split(",")]) as es: ret = await es.create(index, doc_type, id, data) return ret
async def just_bulk(self, datas): async with Elasticsearch([i for i in self.host.split(",")]) as es: d = [] with tqdm(total=len(datas)) as pbar: for i, v in enumerate(datas): if i > 0 and i % 1024 == 0: await es.bulk(d) pbar.update(1024) if isinstance(v, list): d = v else: d = [v] else: if isinstance(v, list): d += v else: d.append(v)