Ejemplo n.º 1
0
def _query_client(nr_docs_query):
    global QUERY_TIMES
    logger.info(f'starting query thread. KEEP_RUNNING = {KEEP_RUNNING}')
    prev_len_matches = 0
    docs = list(
        _get_documents(nr=nr_docs_query, index_start=0, emb_size=EMB_SIZE))
    Client.check_input(docs)
    query_docs = [doc.dict() for doc in docs]
    while KEEP_RUNNING:
        try:
            logger.info(f'querying...')
            r = _send_rest_request(
                REST_PORT_QUERY,
                'search',
                'post',
                query_docs,
                timeout=8,
            )
            for doc in r['search']['docs']:
                len_matches = len(doc.get('matches'))
                assert len_matches >= prev_len_matches
            logger.info(f'got {len_matches} matches')
            if len_matches != prev_len_matches:
                # only count queries after a change in index size
                QUERY_TIMES += 1
            prev_len_matches = len_matches
            time.sleep(3)
        except (ConnectionError, ReadTimeoutError) as e:
            logger.error(f'querying failed: {e}. trying again...')
            logger.error(traceback.format_exc())
        except (NewConnectionError, Exception) as e:
            logger.error(f'error in query thread: {e!r}')
            raise e
Ejemplo n.º 2
0
def wrapper(args, docs, id, function, time_end, req_size):
    client = Client(args)
    print(f'Process {id}: Running function {function.__name__} with {len(docs)} docs...')
    while True:
        client.check_input(docs)
        function(client, docs, req_size)
        if time.time() >= time_end:
            print(f'Process {id}: end reached')
            # close Process
            return
Ejemplo n.º 3
0
def _index_client(nr_docs_index):
    global INDEX_TIMES
    logger.info(f'starting index thread. KEEP_RUNNING = {KEEP_RUNNING}')
    while KEEP_RUNNING:
        docs = list(
            _get_documents(
                nr=nr_docs_index,
                index_start=INDEX_TIMES * nr_docs_index,
                emb_size=EMB_SIZE,
            ))
        Client.check_input(docs)
        logger.info(f'indexing {len(docs)} docs...')
        _send_rest_request(REST_PORT_DBMS, 'index', 'post',
                           [doc.dict() for doc in docs])
        INDEX_TIMES += 1
        time.sleep(7)
Ejemplo n.º 4
0
def wrapper(args, docs_gen_func: Callable[[int], Generator], id,
            function: ClientFunction, time_start: int, time_end: int,
            req_size: int, dataset: str, nr_docs: int):
    client = Client(args)
    total_docs = 0
    while True:
        # add counter for docs and log to file {id}
        print(
            f'Process {id}: Running function {function.__name__} with {nr_docs} docs via {docs_gen_func.__name__}...'
        )
        client.check_input(docs_gen_func(nr_docs))
        function(client, docs_gen_func, req_size, dataset, nr_docs)
        total_docs += nr_docs
        done_time = time.time()
        if done_time >= time_end:
            print(f'Process {id}: end reached')
            fname = os.path.join(
                f'{FILE_PREFIX}-{time_end}-{function.__name__}-{id}.txt')
            print(f'process {id}: logging stats to {fname}')
            with open(fname, 'w') as f:
                f.write(f'{str(total_docs)}\n')
            return