def _test_error(flow_kwargs, add_kwargs, error_port=None): f = Flow(**flow_kwargs).add(**add_kwargs) with f: with pytest.raises(ConnectionError) as err_info: f.index(inputs=[]) if error_port: assert str(error_port) in err_info.value.args[0]
def test_flow_on_error_callback(restful): class DummyCrafterNotImplemented(Executor): @requests def craft(self, text, *args, **kwargs): raise NotImplementedError f = Flow(restful=restful).add(uses='!DummyCrafterNotImplemented') hit = [] def f1(*args): hit.append('done') def f2(*args): hit.append('error') def f3(*args): hit.append('always') with f: f.index( DocumentArray.from_ndarray(np.random.random([10, 10])), on_done=f1, on_error=f2, on_always=f3, ) assert hit == ['error', 'always'] hit.clear()
def test_sparse_pipeline(mocker, docs_to_index): def validate(response): assert len(response.data.docs) == 10 for doc in response.data.docs: for i, match in enumerate(doc.matches): assert match.id == docs_to_index[i].id assert isinstance(match.embedding, sparse.coo_matrix) f = Flow().add(uses=DummyCSRSparseIndexEncoder) mock = mocker.Mock() error_mock = mocker.Mock() with f: f.index( inputs=docs_to_index, on_done=mock, ) f.search( inputs=docs_to_index[0], parameters={ 'doc': docs_to_index[0], 'top_k': 1 }, on_done=mock, on_error=error_mock, ) mock.assert_called_once() validate_callback(mock, validate) error_mock.assert_not_called()
def test_shards(): f = Flow().add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=3) with f: f.index(input_fn=random_docs(1000), random_doc_id=False) with f: pass rm_files(['test-docshard-tmp'])
def test_roundrobin(): f = Flow(runtime='process').add(name='sw', uses='SlowWorker', parallel=10, scheduling=SchedulerType.ROUND_ROBIN) with f: f.index(inputs=random_docs(100), request_size=10)
def test_flow_needs_all(protocol): f = Flow(protocol=protocol).add(name='p1', needs='gateway').needs_all(name='r1') assert f._pod_nodes['r1'].needs == {'p1'} f = ( Flow(protocol=protocol) .add(name='p1', needs='gateway') .add(name='p2', needs='gateway') .add(name='p3', needs='gateway') .needs(needs=['p1', 'p2'], name='r1') .needs_all(name='r2') ) assert f._pod_nodes['r2'].needs == {'p3', 'r1'} with f: f.index(from_ndarray(np.random.random([10, 10]))) f = ( Flow(protocol=protocol) .add(name='p1', needs='gateway') .add(name='p2', needs='gateway') .add(name='p3', needs='gateway') .needs(needs=['p1', 'p2'], name='r1') .needs_all(name='r2') .add(name='p4', needs='r2') ) assert f._pod_nodes['r2'].needs == {'p3', 'r1'} assert f._pod_nodes['p4'].needs == {'r2'} with f: f.index(from_ndarray(np.random.random([10, 10])))
def test_on_error_callback(mocker, restful): def validate1(): raise NotImplementedError class MyExecutor(Executor): @requests def foo(self, **kwargs): raise NotImplementedError def validate2(x, *args): x = x.routes assert len(x) == 4 # gateway, r1, r3, gateway badones = [r for r in x if r.status.code == jina_pb2.StatusProto.ERROR] assert badones[0].pod == 'r3/ZEDRuntime' f = Flow(restful=restful).add(name='r1').add(name='r3', uses=MyExecutor) on_error_mock = mocker.Mock() with f: f.index( [Document(text='abbcs'), Document(text='efgh')], on_done=validate1, on_error=on_error_mock, ) validate_callback(on_error_mock, validate2)
def test_container_override_params(docker_image_built, tmpdir, mocker): def validate_response(resp): assert len(resp.docs) > 0 for doc in resp.docs: assert doc.tags['greetings'] == 'overriden greetings' mock = mocker.Mock() abc_path = os.path.join(tmpdir, 'abc') f = Flow().add( name=random_name(), uses=f'docker://{img_name}', volumes=abc_path + ':' + '/mapped/here/abc', uses_with={'greetings': 'overriden greetings'}, uses_metas={ 'name': 'ext-mwu-encoder', 'workspace': '/mapped/here/abc', }, ) with f: f.index(random_docs(10), on_done=mock) assert os.path.exists( os.path.join(abc_path, 'ext-mwu-encoder', '0', 'ext-mwu-encoder.bin')) validate_callback(mock, validate_response)
def test_shards(docpb_workspace): f = Flow().add( name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=3 ) with f: f.index(inputs=random_docs(1000), random_doc_id=False) with f: pass
def test_flow_no_container(restful): f = Flow(restful=restful).add( name='dummyEncoder', uses=os.path.join(cur_dir, '../mwu-encoder/mwu_encoder.yml'), ) with f: f.index(inputs=random_docs(10))
def test_seg(mocker, restful, uses): """tests segments provided the uses for a flow.""" mock = mocker.Mock() f = Flow(restful=restful).add(uses=uses) with f: f.index(inputs=random_docs(10, chunks_per_doc=0), on_done=mock) mock.assert_called_once() validate_callback(mock, validate)
def test_flow_before_after(protocol): docs = random_docs(10) f = Flow(protocol=protocol).add(uses_before=MyExec, uses_after=MyExec, name='p1') with f: f.index(docs) assert f.num_deployments == 2 assert f._deployment_nodes['p1'].num_pods == 4 assert f.num_pods == 5
def test_mime_type(protocol): f = Flow(protocol=protocol).add(uses=MimeExec) def validate_mime_type(req): for d in req.data.docs: assert d.mime_type == 'text/x-python' with f: f.index(from_files('*.py'), validate_mime_type)
def test_flow_after(protocol): docs = random_docs(10) f = Flow(protocol=protocol).add(uses_after=MyExec, name='p1') with f: f.index(docs) assert f.num_pods == 2 assert f._pod_nodes['p1'].num_peas == 2 assert f.num_peas == 3
def index_documents(): f = Flow().load_config('flows/index.yml') with f: pdf_files = [ 'toy_data/blog1.pdf', 'toy_data/blog2.pdf', 'toy_data/blog3.pdf' ] f.index(input_fn=index_generator(data_path=pdf_files), read_mode='r', request_size=1)
def test_flow(protocol): docs = random_docs(10) f = Flow(protocol=protocol).add(name='p1') with f: f.index(docs) assert f.num_deployments == 2 assert f._deployment_nodes['p1'].num_pods == 2 assert f.num_pods == 3
def test_refactor_num_part_2(protocol): f = Flow(protocol=protocol).add(name='r1', needs='gateway', shards=3, polling='ALL') with f: f.index([Document(text='abbcs'), Document(text='efgh')]) f = Flow(protocol=protocol).add(name='r1', needs='gateway', shards=3) with f: f.index([Document(text='abbcs'), Document(text='efgh')])
def test_l_r_l_simple(parallels, mocker): response_mock = mocker.Mock() f = Flow().add().add(host=CLOUD_HOST, parallel=parallels).add() with f: f.index( inputs=(Document(text='hello') for _ in range(NUM_DOCS)), on_done=response_mock, ) response_mock.assert_called()
def test_l_r_l_simple(replicas, mocker): response_mock = mocker.Mock() f = Flow().add().add(host=CLOUD_HOST, replicas=replicas).add() with f: f.index( inputs=(Document(text='hello') for _ in range(NUM_DOCS)), on_done=response_mock, show_progress=True, ) response_mock.assert_called()
def test_remote_executor_gpu(mocker, gpus): # This test wouldn't be able to use gpus on remote, as they're not available on CI. # But it shouldn't fail the Pea creation. response_mock = mocker.Mock() f = Flow().add(host=CLOUD_HOST, gpus=gpus) with f: f.index( inputs=(Document(text='hello') for _ in range(NUM_DOCS)), on_done=response_mock, ) response_mock.assert_called()
def test_flow_default_before_after_is_ignored(protocol): docs = random_docs(10) f = Flow(protocol=protocol).add( uses_after=__default_executor__, uses_before=__default_executor__, name='p1' ) with f: f.index(docs) assert f.num_deployments == 2 assert f._deployment_nodes['p1'].num_pods == 2 assert f.num_pods == 3
def test_send_complex_document(docs, executor_class, mocker): def validate(resp): doc = resp.docs[0] assert doc.tags == EXPECTED_TAGS mock = mocker.Mock() f = Flow().add(uses=executor_class) with f: f.index(inputs=docs, on_done=mock) validate_callback(mock, validate)
def test_refactor_num_part_2(restful): f = Flow(restful=restful).add(name='r1', needs='gateway', parallel=3, polling='ALL') with f: f.index([Document(text='abbcs'), Document(text='efgh')]) f = Flow(restful=restful).add(name='r1', needs='gateway', parallel=3) with f: f.index([Document(text='abbcs'), Document(text='efgh')])
def test_flow_after(protocol): class MyExec(Executor): @requests def foo(self, **kwargs): pass docs = random_docs(10) f = Flow(protocol=protocol).add(uses_after=MyExec, name='p1') with f: f.index(docs) assert f.num_pods == 2 assert f._pod_nodes['p1'].num_peas == 2 assert f.num_peas == 3
def test_refactor_num_part_2(restful): f = Flow(restful=restful).add( name='r1', uses='_logforward', needs='gateway', parallel=3, polling='ALL' ) with f: f.index(['abbcs', 'efgh']) f = Flow(restful=restful).add( name='r1', uses='_logforward', needs='gateway', parallel=3 ) with f: f.index(['abbcs', 'efgh'])
def test_mime_type(protocol): class MyExec(Executor): @req def foo(self, docs: 'DocumentArray', **kwargs): for d in docs: d.convert_uri_to_buffer() f = Flow(protocol=protocol).add(uses=MyExec) def validate_mime_type(req): for d in req.data.docs: assert d.mime_type == 'text/x-python' with f: f.index(from_files('*.py'), validate_mime_type)
def test_compression(compress_algo): f = Flow(compress=str(compress_algo)).add().add(name='DummyEncoder', shards=2).add() with f: results = f.index(random_docs(10), return_results=True) assert len(results) > 0
def test_two_flow_with_shared_external_deployment(external_deployment, external_deployment_args, input_docs, num_shards): external_deployment.head_args.disable_reduce = True with external_deployment: external_args = vars(external_deployment_args) del external_args['name'] del external_args['external'] del external_args['deployment_role'] flow1 = Flow().add( **external_args, name='external_fake', external=True, ) flow2 = (Flow().add(name='foo').add( **external_args, name='external_fake', external=True, needs=['gateway', 'foo'], )) with flow1, flow2: results = flow1.index(inputs=input_docs) # Reducing applied after shards, expect only 50 docs validate_response(results, 50) # Reducing applied after sharding and the needs results = flow2.index(inputs=input_docs) validate_response(results, 50)
def test_two_flow_with_shared_external_executor( external_executor, external_executor_args, input_docs, ): with external_executor: external_args = vars(external_executor_args) del external_args['name'] flow1 = Flow().add( **external_args, name='external_fake', external=True, ) flow2 = (Flow().add(name='foo').add( **external_args, name='external_fake', external=True, needs=['gateway', 'foo'], )) with flow1, flow2: results = flow1.index(inputs=input_docs, return_results=True) validate_response(results[0]) results = flow2.index(inputs=input_docs, return_results=True) validate_response(results[0], 50 * 2)
def test_two_flow_with_shared_external_pod(external_pod, external_pod_args, input_docs, num_replicas, num_parallel): with external_pod: external_args = vars(external_pod_args) del external_args['name'] del external_args['external'] del external_args['pod_role'] del external_args['dynamic_routing'] flow1 = Flow().add( **external_args, name='external_fake', external=True, ) flow2 = (Flow().add(name='foo').add( **external_args, name='external_fake', external=True, needs=['gateway', 'foo'], )) with flow1, flow2: results = flow1.index(inputs=input_docs, return_results=True) validate_response(results[0], 50 * num_parallel) results = flow2.index(inputs=input_docs, return_results=True) validate_response(results[0], 50 * num_parallel * 2)