def index(client: Client, docs_gen_func: Callable[[int], Generator], req_size: int, dataset: str, nr_docs: int): on_done = index_done if dataset == 'text': # TODO maybe specific validation? pass client.index(docs_gen_func(nr_docs), request_size=req_size, on_done=on_done)
def test_custom_dockerfile(): f = Flow(port_expose=exposed_port).add( uses='DummyRedisIndexer', py_modules='redis_executor.py', upload_files=[ os.path.join(cur_dir, '../../daemon/unit/models/good_ws_custom_dockerfile'), ], host='localhost:8000', ) with f: c = Client(port=exposed_port) c.index( inputs=( Document(text=f'{i}', embedding=np.random.rand(2, 3)) for i in range(5) ), ) resp = c.search(inputs=[Document(text='3')], return_results=True) assert resp[0].docs[0].matches[0].text == '3' assert resp[0].docs[0].matches[0].embedding.shape == (2, 3)
def test_scale_with_concurrent_client(remote_flow_with_runtime: Flow, pod_params, protocol): def peer_client(port, protocol, peer_hash, queue): rv = Client(protocol=protocol, port=port).index( [ Document(text=peer_hash) for _ in range(NUM_DOCS_SENT_BY_CLIENTS) ], request_size=5, return_results=True, ) for r in rv: for doc in r.docs: # our proto objects are not fit to be sent by queues queue.put(doc.text) num_replicas, scale_to, _ = pod_params queue = multiprocessing.Queue() with remote_flow_with_runtime as f: f.protocol = protocol port_expose = f.port_expose thread_pool = [] for peer_id in range(NUM_CONCURRENT_CLIENTS): # test t = multiprocessing.Process(target=partial( peer_client, port_expose, protocol, str(peer_id), queue)) t.start() thread_pool.append(t) f.scale(pod_name='executor', replicas=scale_to) for t in thread_pool: t.join() c = Client(protocol=protocol, port=port_expose) rv = c.index([Document() for _ in range(5)], request_size=1, return_results=True) assert queue.qsize() == NUM_CONCURRENT_CLIENTS * NUM_DOCS_SENT_BY_CLIENTS all_docs = [] while not queue.empty(): all_docs.append(queue.get()) assert len(all_docs) == NUM_CONCURRENT_CLIENTS * NUM_DOCS_SENT_BY_CLIENTS assert len(rv) == 5 replicas = [] for r in rv: assert len(r.docs) == 1 replicas.append(r.docs[0].tags['replica_id'])