async def test_scan_equal_chunks_for_loop(es, es_clean, populate): for n, scroll_size in [ (0, 1), # no results (6, 6), # 1 scroll (6, 8), # 1 scroll (6, 3), # 2 scrolls (6, 4), # 2 scrolls (6, 2), # 3 scrolls (6, 1), # 6 scrolls ]: es_clean() index = 'test_aioes' doc_type = 'type_1' body = {'foo': 1} await populate(index, doc_type, n, body) ids = set() async with Scan( es, index=index, doc_type=doc_type, size=scroll_size, ) as scan: async for doc in scan: ids.add(doc['_id']) # check number of unique doc ids assert len(ids) == n == scan.total
async def test_scan_exception_on_failed_shards(es, populate, mocker): index = 'test_aioes' doc_type = 'type_2' scroll_size = 3 n = 10 body = {'foo': 1} await populate(index, doc_type, n, body) mocker.spy(logger, 'warning') i = 0 async with Scan( es, index=index, doc_type=doc_type, size=scroll_size, ) as scan: with pytest.raises(ScanError) as cm: async for doc in scan: # noqa if i == 3: # once after first scroll scan._failed_shards = 1 scan._totl_shards = 2 i += 1 assert (str(cm.value) == 'Scroll request has failed on 1 shards out of 5.') assert i == 6 logger.warning.assert_called_once_with( 'Scroll request has failed on %d shards out of %d.', 1, 5)
async def test_scan_simple(es, populate): index = 'test_aioes' doc_type = 'type_2' scroll_size = 3 n = 10 body = {'foo': 1} await populate(index, doc_type, n, body) ids = set() async with Scan( es, index=index, doc_type=doc_type, size=scroll_size, ) as scan: assert isinstance(scan.scroll_id, str) assert scan.total == 10 async for doc in scan: ids.add(doc['_id']) assert doc == {'_id': mock.ANY, '_index': 'test_aioes', '_score': None, '_source': {'foo': 1}, '_type': 'type_2', 'sort': mock.ANY} assert ids == {str(i) for i in range(10)}
async def search(request): es = Elasticsearch() q = request.query.get('q') try: limit = int(request.query.get('limit', 0)) offset = int(request.query.get('offset', 0)) except: return json_response({'response': 'wrong query'}) body = {} if q: body['query'] = {'match': {'text': q}} async with Scan( es, index=index_name, doc_type='crawler', query=body, ) as scan_res: res_source, count = await format_search(scan_res, limit, offset) text = { 'total_hits': count, 'count': len(res_source), 'results': res_source } return json_response(text)
async def test_scan_warning_on_failed_shards(es, populate, mocker): index = 'test_aioes' doc_type = 'type_2' scroll_size = 3 n = 10 body = {'foo': 1} await populate(index, doc_type, n, body) mocker.spy(logger, 'warning') async with Scan( es, index=index, doc_type=doc_type, size=scroll_size, raise_on_error=False, ) as scan: i = 0 async for doc in scan: # noqa if i == 3: # once after first scroll scan._failed_shards = 1 scan._totl_shards = 2 i += 1 logger.warning.assert_called_once_with( 'Scroll request has failed on %d shards out of %d.', 1, 5)
async def es_range(self, index, tp, *keys, call=None, **query): async with Elasticsearch([self.host]) as es: async with Scan( es, index=index, doc_type=tp, query=query, ) as scan: res = [] count = await es.count(index=index) count = count['count'] progressbar = tqdm(desc="scan all elasticsearch", total=count) ic = 0 si = count / 1000 async for doc in scan: ic += 1 if ic > 0 and ic % 1000 == 0: progressbar.update(si) if call: call(doc) else: dd = {} for k in keys: km = k.split(':') v = doc for kk in km: v = v.get(kk) if not v: break dd[k] = v res.append(dd) progressbar.close() return res
async def search(request): logger.info(request.query) try: schema = SearchViewSchema() r = schema.load({**request.query}) q, limit, offset = r['q'], r.get('limit', 100), r.get('offset', 0) except Exception as e: r = {'status': 'bad_request', 'reason': str(e)} logger.error(r) return await json_response(r) body = {'query': {'match': {'text': q}}} all_documents = [ '{}://{}'.format('https' if i.https else 'http', i.domain) async for i in await CrawlerStats.objects.all() ] index_names_docs = [ ''.join([ i for i in ii if i not in ('[', '"', '*', '\\\\', '\\', '<', '|', ',', '>', '/', '?', ':') ]) for ii in all_documents ] response_data = { 'total_hits': 0, 'count': 0, 'documents_in_list': [], 'results': [] } for i, index_name in enumerate(index_names_docs): async with Scan( es, index=index_name, doc_type='crawler', query=body, ) as scan_res: res_source = [{ 'id': i['_id'], **i['_source'] } async for i in scan_res] response_data['total_hits'] += len(res_source) response_data['results'].extend(res_source) count = len(response_data['results']) if limit: response_data['results'] = response_data['results'][ offset:min(limit + offset, count)] else: response_data['results'] = response_data['results'][offset:] response_data['documents_in_list'] = list( set([await get_domain(i['url']) for i in response_data['results']])) response_data['count'] = len(response_data['results']) r = {'status': 'ok', 'data': response_data} logger.info(r) return await json_response(r)
async def test_scan_no_mask_index(es): index = 'undefined-*' scroll_size = 3 async with Scan( es, index=index, size=scroll_size, ) as scan: assert scan.scroll_id is None assert scan.total['value'] == 0 cnt = 0 async for doc in scan: # noqa cnt += 1 assert cnt == 0
async def test_scan_no_index(es): index = 'undefined' doc_type = 'any' scroll_size = 3 async with Scan( es, index=index, doc_type=doc_type, size=scroll_size, ) as scan: assert scan.scroll_id is None assert scan.total == 0 cnt = 0 async for doc in scan: # noqa cnt += 1 assert cnt == 0
async def test_scan_no_scroll(es, loop, populate): index = 'test_aioes' n = 10 scroll_size = 1 body = {'foo': 1} await populate(index, n, body) async with Scan( es, size=scroll_size, ) as scan: # same comes after search context expiration await scan._do_clear_scroll() with pytest.raises(NotFoundError): async for doc in scan: doc
def test_scan_scroll_id_without_context_manager(es): scan = Scan(es) with pytest.raises(RuntimeError): scan.scroll_id
async def test_scan_async_for_without_context_manager(es): scan = Scan(es) with pytest.raises(RuntimeError): async for doc in scan: doc
def test_scan_total_without_context_manager(es): scan = Scan(es) with pytest.raises(RuntimeError): scan.total