def test_pathURI2Buffer(): f = (Flow().add(uses='- !URI2Buffer {}') .add(uses='- !Buffer2URI {}')) with f: f.index(input_fn=input_fn3)
def test_port_configuration(replicas_and_parallel): def extract_pod_args(pod): if 'replicas' not in pod.args or int(pod.args.replicas) == 1: head_args = pod.peas_args['head'] tail_args = pod.peas_args['tail'] middle_args = pod.peas_args['peas'] else: head_args = pod.head_args tail_args = pod.tail_args middle_args = pod.replicas_args return pod, head_args, tail_args, middle_args def get_outer_ports(pod, head_args, tail_args, middle_args): if not 'replicas' in pod.args or int(pod.args.replicas) == 1: if not 'parallel' in pod.args or int(pod.args.parallel) == 1: assert tail_args is None assert head_args is None replica = middle_args[0] # there is only one return replica.port_in, replica.port_out else: return pod.head_args.port_in, pod.tail_args.port_out else: assert pod.args.replicas == len(middle_args) return pod.head_args.port_in, pod.tail_args.port_out def validate_ports_pods(pods): for i in range(len(pods) - 1): _, port_out = get_outer_ports(*extract_pod_args(pods[i])) port_in_next, _ = get_outer_ports(*extract_pod_args(pods[i + 1])) assert port_out == port_in_next def validate_ports_replica(replica, replica_port_in, replica_port_out, parallel): assert replica_port_in == replica.args.port_in assert replica.args.port_out == replica_port_out peas_args = replica.peas_args peas = peas_args['peas'] assert len(peas) == parallel if parallel == 1: assert peas_args['head'] is None assert peas_args['tail'] is None assert peas[0].port_in == replica_port_in assert peas[0].port_out == replica_port_out else: shard_head = peas_args['head'] shard_tail = peas_args['tail'] assert replica.args.port_in == shard_head.port_in assert replica.args.port_out == shard_tail.port_out for pea in peas: assert shard_head.port_out == pea.port_in assert pea.port_out == shard_tail.port_in flow = Flow() for i, (replicas, parallel) in enumerate(replicas_and_parallel): flow.add( name=f'pod{i}', replicas=replicas, parallel=parallel, port_in=f'51{i}00', # info: needs to be set in this test since the test is asserting pod args with pod tail args port_out=f'51{i + 1}00', # outside this test, it don't have to be set copy_flow=False, ) with flow: pods = flow._pod_nodes validate_ports_pods( [pods['gateway']] + [pods[f'pod{i}'] for i in range(len(replicas_and_parallel))] + [pods['gateway']] ) for pod_name, pod in pods.items(): if pod_name == 'gateway': continue if pod.args.replicas == 1: if int(pod.args.parallel) == 1: assert len(pod.peas_args['peas']) == 1 else: assert len(pod.peas_args) == 3 replica_port_in = pod.args.port_in replica_port_out = pod.args.port_out else: replica_port_in = pod.head_args.port_out replica_port_out = pod.tail_args.port_in assert pod.head_pea.args.port_in == pod.args.port_in assert pod.head_pea.args.port_out == replica_port_in assert pod.tail_pea.args.port_in == replica_port_out assert pod.tail_pea.args.port_out == pod.args.port_out if pod.args.replicas > 1: for replica in pod.replicas: validate_ports_replica( replica, replica_port_in, replica_port_out, getattr(pod.args, 'parallel', 1), ) assert pod
def dryrun(): f = Flow().load_config("flows/index.yml") with f: f.dry_run()
def get_index_flow(yaml_file, num_shards): f = Flow().add( uses=os.path.join(cur_dir, 'yaml', yaml_file), shards=num_shards, ) return f
def test_dryrun(self): f = (Flow().add(name='dummyEncoder', yaml_path='mwu-encoder/mwu_encoder.yml')) with f: f.dry_run()
def test_flow_with_modalities(tmpdir, restful): os.environ['JINA_TEST_FLOW_MULTIMODE_WORKSPACE'] = str(tmpdir) def input_function(): doc1 = jina_pb2.DocumentProto() doc1.text = 'title: this is mode1 from doc1, body: this is mode2 from doc1' doc1.id = '1' doc2 = jina_pb2.DocumentProto() doc2.text = 'title: this is mode1 from doc2, body: this is mode2 from doc2' doc2.id = '2' doc3 = jina_pb2.DocumentProto() doc3.text = 'title: this is mode1 from doc3, body: this is mode2 from doc3' doc3.id = '3' return [doc1, doc2, doc3] flow = ( Flow(restful=restful) .add(name='segmenter', uses='!MockSegmenter') .add(name='encoder1', uses=os.path.join(cur_dir, 'yaml/mockencoder-mode1.yml')) .add( name='indexer1', uses=os.path.join(cur_dir, 'yaml/numpy-indexer-1.yml'), needs=['encoder1'], ) .add( name='encoder2', uses=os.path.join(cur_dir, 'yaml/mockencoder-mode2.yml'), needs=['segmenter'], ) .add(name='indexer2', uses=os.path.join(cur_dir, 'yaml/numpy-indexer-2.yml')) .join(['indexer1', 'indexer2']) ) with flow: flow.index(inputs=input_function) with open(os.path.join(tmpdir, 'compound', 'vecidx1-0', 'vec1.gz'), 'rb') as fp: result = np.frombuffer(fp.read(), dtype='float').reshape([-1, 3]) np.testing.assert_equal( result, np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]) ) with open(os.path.join(tmpdir, 'compound', 'vecidx2-0', 'vec2.gz'), 'rb') as fp: result = np.frombuffer(fp.read(), dtype='float').reshape([-1, 3]) np.testing.assert_equal( result, np.array([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]) ) chunkIndexer1 = BinaryPbIndexer.load( os.path.join(tmpdir, 'compound', 'kvidx1-0', 'kvidx1.bin') ) assert chunkIndexer1.size == 3 d_id = list(chunkIndexer1.query_handler.header.keys())[0] query_doc = jina_pb2.DocumentProto() query_doc.ParseFromString(chunkIndexer1.query([d_id])[0]) assert query_doc.text == 'title: this is mode1 from doc1' assert query_doc.modality == 'mode1' chunkIndexer2 = BinaryPbIndexer.load( os.path.join(tmpdir, 'compound', 'kvidx2-0', 'kvidx2.bin') ) assert chunkIndexer2.size == 3 d_id = list(chunkIndexer2.query_handler.header.keys())[0] query_doc = jina_pb2.DocumentProto() query_doc.ParseFromString(chunkIndexer2.query([d_id])[0]) assert query_doc.text == ' body: this is mode2 from doc1' assert query_doc.modality == 'mode2' del os.environ['JINA_TEST_FLOW_MULTIMODE_WORKSPACE']
def test_flow_arguments_priorities(): f = Flow(port_expose=12345).add(name='test', port_expose=23456) assert '23456' in f._pod_nodes['test'].cli_args assert '12345' not in f._pod_nodes['test'].cli_args
def get_flow(): f = Flow().load_config(QUERY_FLOW_FILE_PATH) f.use_rest_gateway() return f
def test_lb(): f = Flow(runtime='process').add(name='sw', uses='SlowWorker', parallel=10) with f: f.index(inputs=random_docs(100), request_size=10)
def test_any_file(mocker): response_mock = mocker.Mock() f = Flow().add(uses='- !URI2DataURI | {base64: true}') with f: f.index(input_fn=input_fn2, on_done=response_mock) response_mock.assert_called()
def test_flow_with_parallel(): f = (Flow().add(name='r1').add(name='r2', parallel=3)) with f: f.index(random_docs(100))
def test_simple_route(): f = Flow().add() with f: f.index(inputs=random_docs(10))
def test_gateway_dataui(): f = (Flow().add(uses='_pass')) with f: f.index_lines(lines=['abc', '123', 'hello, world'])
def test_text2datauri(): f = (Flow().add(uses='- !Text2URI {}')) with f: f.index_lines(lines=['abc', '123', 'hello, world'])
def flow(): return Flow(rest_api=False).add()
def test_flow(): f = Flow().add().add(host='[email protected]', remote_manager=RemoteAccessType.SSH) with f: pass
def flow_with_rest_api_enabled(): return Flow(rest_api=True).add()
def test_flow_with_one_container_pod(self): f = (Flow() .add(name='dummyEncoder', uses=img_name)) with f: f.index(input_fn=random_docs(10))
def test_flow_with_jump(): f = (Flow().add(name='r1').add(name='r2').add(name='r3', needs='r1').add( name='r4', needs='r2').add(name='r5', needs='r3').add(name='r6', needs='r4').add( name='r8', needs='r6').add(name='r9', needs='r5').add(name='r10', needs=['r9', 'r8'])) with f: pass node = f._pod_nodes['gateway'] assert node.head_args.socket_in == SocketType.PULL_CONNECT assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r1'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUB_BIND node = f._pod_nodes['r2'] assert node.head_args.socket_in == SocketType.SUB_CONNECT assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r3'] assert node.head_args.socket_in == SocketType.SUB_CONNECT assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r4'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r5'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r6'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r8'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r9'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUSH_CONNECT node = f._pod_nodes['r10'] assert node.head_args.socket_in == SocketType.PULL_BIND assert node.tail_args.socket_out == SocketType.PUSH_BIND for name, node in f._pod_nodes.items(): assert node.peas_args['peas'][0] == node.head_args assert node.peas_args['peas'][0] == node.tail_args f.save_config('tmp.yml') Flow.load_config('tmp.yml') with Flow.load_config('tmp.yml') as fl: pass rm_files(['tmp.yml'])
def test_client_ndjson(restful, mocker, func_name): with Flow(restful=restful).add() as f, \ open(os.path.join(cur_dir, 'docs.jsonlines')) as fp: mock = mocker.Mock() getattr(f, f'{func_name}_ndjson')(fp, on_done=mock) mock.assert_called_once()
def test_flow_default_argument_passing(): f = Flow(port_expose=12345).add(name='test') assert '12345' in f._pod_nodes['test'].cli_args
def test_topk(config): with Flow().load_config('flow.yml') as index_flow: index_flow.index(input_fn=random_docs(100)) with Flow().load_config('flow.yml') as search_flow: search_flow.search(input_fn=random_docs(int(os.environ['JINA_NDOCS'])), output_fn=validate_results)
def test_flow_with_one_container_pod(docker_image_built): f = (Flow().add(name='dummyEncoder1', uses=img_name)) with f: f.index(input_fn=random_docs(10))
def get_flow(): f = Flow().load_config('flows/query.yml') f.use_grpc_gateway() return f
def test_flow_no_container(self): f = (Flow().add(name='dummyEncoder', yaml_path='mwu-encoder/mwu_encoder.yml')) with f: f.index(raw_bytes=random_docs(10), in_proto=True)
def plot(req): neighbour1 = vecs[[m.id - 1 for m in req.docs[0].matches], :] neighbour2 = vecs[[ mm.id - 1 for m in req.docs[0].matches for mm in m.matches ]] neighbour3 = vecs[[ mmm.id - 1 for m in req.docs[0].matches for mm in m.matches for mmm in mm.matches ]] fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(vecs[:, 0], vecs[:, 1], alpha=0.5) ax.scatter(neighbour3[:, 0], neighbour3[:, 1], color='yellow', alpha=0.5) ax.scatter(neighbour2[:, 0], neighbour2[:, 1], color='cyan', alpha=0.5) ax.scatter(neighbour1[:, 0], neighbour1[:, 1], color='green', alpha=0.5) ax.scatter(vecs[0][0], vecs[0][1], color='red', alpha=0.5) ax.set_aspect('equal', adjustable='box') ax.legend(['index', 'adjacency=3', 'adjacency=2', 'adjacency=1', 'query'], title='high order matches') plt.show() f = Flow(callback_on_body=True).add(uses='test-adj.yml') with f: f.index(index_docs) with f: f.search([index_docs[0]], output_fn=plot)
def query_restful(): f = Flow().load_config("flows/query.yml") f.use_rest_gateway() with f: f.block()
def test_client_csv(restful, mocker, func_name): with Flow(restful=restful).add() as f, open( os.path.join(cur_dir, 'docs.csv')) as fp: mock = mocker.Mock() getattr(f, f'{func_name}')(from_csv(fp), on_done=mock) mock.assert_called_once()
def search(): f = Flow().load_config('flow-query.yml') with f: f.block()
def test_any_file(): f = Flow().add(uses='- !URI2DataURI | {base64: true}') with f: f.index(input_fn=input_fn2)