def _query_client(nr_docs_query): global QUERY_TIMES logger.info(f'starting query thread. KEEP_RUNNING = {KEEP_RUNNING}') prev_len_matches = 0 docs = list( _get_documents(nr=nr_docs_query, index_start=0, emb_size=EMB_SIZE)) Client.check_input(docs) query_docs = [doc.dict() for doc in docs] while KEEP_RUNNING: try: logger.info(f'querying...') r = _send_rest_request( REST_PORT_QUERY, 'search', 'post', query_docs, timeout=8, ) for doc in r['search']['docs']: len_matches = len(doc.get('matches')) assert len_matches >= prev_len_matches logger.info(f'got {len_matches} matches') if len_matches != prev_len_matches: # only count queries after a change in index size QUERY_TIMES += 1 prev_len_matches = len_matches time.sleep(3) except (ConnectionError, ReadTimeoutError) as e: logger.error(f'querying failed: {e}. trying again...') logger.error(traceback.format_exc()) except (NewConnectionError, Exception) as e: logger.error(f'error in query thread: {e!r}') raise e
def wrapper(args, docs, id, function, time_end, req_size): client = Client(args) print(f'Process {id}: Running function {function.__name__} with {len(docs)} docs...') while True: client.check_input(docs) function(client, docs, req_size) if time.time() >= time_end: print(f'Process {id}: end reached') # close Process return
def _index_client(nr_docs_index): global INDEX_TIMES logger.info(f'starting index thread. KEEP_RUNNING = {KEEP_RUNNING}') while KEEP_RUNNING: docs = list( _get_documents( nr=nr_docs_index, index_start=INDEX_TIMES * nr_docs_index, emb_size=EMB_SIZE, )) Client.check_input(docs) logger.info(f'indexing {len(docs)} docs...') _send_rest_request(REST_PORT_DBMS, 'index', 'post', [doc.dict() for doc in docs]) INDEX_TIMES += 1 time.sleep(7)
def wrapper(args, docs_gen_func: Callable[[int], Generator], id, function: ClientFunction, time_start: int, time_end: int, req_size: int, dataset: str, nr_docs: int): client = Client(args) total_docs = 0 while True: # add counter for docs and log to file {id} print( f'Process {id}: Running function {function.__name__} with {nr_docs} docs via {docs_gen_func.__name__}...' ) client.check_input(docs_gen_func(nr_docs)) function(client, docs_gen_func, req_size, dataset, nr_docs) total_docs += nr_docs done_time = time.time() if done_time >= time_end: print(f'Process {id}: end reached') fname = os.path.join( f'{FILE_PREFIX}-{time_end}-{function.__name__}-{id}.txt') print(f'process {id}: logging stats to {fname}') with open(fname, 'w') as f: f.write(f'{str(total_docs)}\n') return