def test_video_cut_by_num(self): args = set_preprocessor_parser().parse_args( ['--yaml_path', self.yml_path_3]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(self.video_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertEqual(len(d.chunks), 6)
def test_preprocessor_service_echo(self): args = set_preprocessor_service_parser().parse_args([]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in) ]) with PreprocessorService(args), ZmqClient(c_args) as client: msg = gnes_pb2.Message() msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) client.send_message(msg) r = client.recv_message() print(r) msg.request.train.docs.extend([gnes_pb2.Document() for _ in range(5)]) client.send_message(msg) r = client.recv_message() print(r)
def setUp(self): dirname = os.path.dirname(__file__) self.rerank_router_yaml = os.path.join(dirname, '../', 'router/block/block_train.yml') self.python_code = os.path.join(dirname, '../', 'router/block/block.py') self.args = set_router_parser().parse_args([ '--yaml_path', self.rerank_router_yaml, '--socket_out', str(SocketType.PUB_BIND), '--py_path', self.python_code ]) self.c_args = _set_client_parser().parse_args([ '--port_in', str(self.args.port_out), '--port_out', str(self.args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ])
def test_rerank(self): args = set_router_parser().parse_args([ '--yaml_path', self.rerank_router_yaml, '--socket_out', str(SocketType.PUB_BIND), '--py_path', self.python_code ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1, ZmqClient(c_args) as c2: msg = gnes_pb2.Message() msg.response.search.ClearField('topk_results') for i, line in enumerate(self.test_str): s = msg.response.search.topk_results.add() s.score.value = 0.1 s.doc.doc_id = i s.doc.raw_text = line msg.envelope.num_part.extend([1]) msg.response.search.top_k = 5 c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) self.assertEqual(len(r.response.search.topk_results), 5) msg = gnes_pb2.Message() msg.response.search.ClearField('topk_results') for i, line in enumerate(self.test_str[:3]): s = msg.response.search.topk_results.add() s.score.value = 0.1 s.doc.doc_id = i s.doc.raw_text = line msg.envelope.num_part.extend([1]) msg.response.search.top_k = 5 c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) self.assertEqual(len(r.response.search.topk_results), 3)
def test_map_router(self): args = set_router_parser().parse_args([ '--yaml_path', self.batch_router_yaml, ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) c1.send_message(msg) r = c1.recv_message() self.assertEqual(len(r.request.index.docs), 2) r = c1.recv_message() self.assertEqual(len(r.request.index.docs), 2) r = c1.recv_message() self.assertEqual(len(r.request.index.docs), 1)
def test_empty_service(self): args = set_encoder_parser().parse_args(['--yaml_path', '!TestEncoder {gnes_config: {name: EncoderService, is_trained: true}}']) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in)]) with ServiceManager(EncoderService, args), ZmqClient(c_args) as client: msg = gnes_pb2.Message() d = msg.request.index.docs.add() d.doc_type = gnes_pb2.Document.IMAGE c = d.chunks.add() c.blob.CopyFrom(array2blob(self.test_numeric)) client.send_message(msg) r = client.recv_message() self.assertEqual(len(r.request.index.docs), 1) self.assertEqual(r.response.index.status, gnes_pb2.Response.SUCCESS)
def test_avg_router(self): args = set_router_parser().parse_args([ '--yaml_path', self.avg_router_yaml, '--socket_out', str(SocketType.PUSH_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.PULL_CONNECT) ]) # 10 chunks in each doc, dimension of chunk embedding is (5, 2) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() for i in range(10): c = msg.request.search.query.chunks.add() c.embedding.CopyFrom(array2blob(np.random.random([5, 2]))) msg.envelope.num_part.extend([1, 3]) c1.send_message(msg) c1.send_message(msg) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) print(r.envelope.routes) for i in range(10): self.assertEqual( r.request.search.query.chunks[i].embedding.shape, [5, 2]) for j in range(1, 4): d = msg.request.index.docs.add() for k in range(10): c = d.chunks.add() c.embedding.CopyFrom(array2blob(np.random.random([5, 2]))) c1.send_message(msg) c1.send_message(msg) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) for j in range(1, 4): for i in range(10): self.assertEqual( r.request.index.docs[j - 1].chunks[i].embedding.shape, [5, 2])
def test_publish_router(self): args = set_router_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1, ZmqClient(c_args) as c2: msg = gnes_pb2.Message() msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) msg.envelope.num_part.append(1) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1, 2]) r = c2.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1, 2])
def test_video_preprocessor_service_realdata(self): args = set_preprocessor_parser().parse_args( ['--yaml_path', self.yml_path]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(self.video_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertGreater(len(d.chunks), 0) for _ in range(len(d.chunks)): shape = blob2array(d.chunks[_].blob).shape self.assertEqual(shape, (168, 192, 3))
def test_video_decode_preprocessor(self): args = set_preprocessor_parser().parse_args(['--yaml_path', self.yml_path]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in)]) video_bytes = [ open(os.path.join(self.video_path, _), 'rb').read() for _ in os.listdir(self.video_path) ] with ServiceManager(PreprocessorService, args), ZmqClient(c_args) as client: for req in RequestGenerator.index(video_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertGreater(len(d.chunks), 0) for _ in range(len(d.chunks)): shape = blob2array(d.chunks[_].blob).shape self.assertEqual(shape[1:], (299, 299, 3))
def test_concat_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.concat_router_yaml, '--socket_out', str(SocketType.PUSH_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.PULL_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() msg.request.search.query.chunk_embeddings.CopyFrom( array2blob(np.random.random([5, 2]))) msg.envelope.num_part = 3 c1.send_message(msg) c1.send_message(msg) c1.send_message(msg) r = c1.recv_message() self.assertEqual(r.envelope.num_part, 1) print(r.envelope.routes) self.assertEqual(r.request.search.query.chunk_embeddings.shape, [5, 6]) for j in range(1, 4): d = msg.request.index.docs.add() d.chunk_embeddings.CopyFrom( array2blob(np.random.random([5, 2 * j]))) msg.envelope.num_part = 3 c1.send_message(msg) c1.send_message(msg) c1.send_message(msg) r = c1.recv_message() self.assertEqual(r.envelope.num_part, 1) for j in range(1, 4): self.assertEqual( r.request.index.docs[j - 1].chunk_embeddings.shape, [5, 6 * j])
def test_unary_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args([ '--yaml_path', self.unary_img_pre_yaml ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in) ]) all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip')) all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()] with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(all_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() self.assertEqual(r.envelope.routes[0].service, 'PreprocessorService:BaseUnaryPreprocessor') for d in r.request.index.docs: self.assertEqual(len(d.chunks), 1) self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3)
def setUp(self): dirname = os.path.dirname(__file__) self.rerank_router_yaml = os.path.join(dirname, 'yaml', 'test-reranker.yml') self.python_code = os.path.join(dirname, '../', 'router/rerank/rerank.py') self.test_str = [] with open(os.path.join(dirname, 'sonnets_small.txt')) as f: for line in f: line = line.strip() if line: self.test_str.append(line) self.args = set_router_parser().parse_args([ '--yaml_path', self.rerank_router_yaml, '--socket_out', str(SocketType.PUB_BIND), '--py_path', self.python_code ]) self.c_args = _set_client_parser().parse_args([ '--port_in', str(self.args.port_out), '--port_out', str(self.args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ])
def test_empty_service(self): args = set_indexer_parser().parse_args([ '--yaml_path', '!BaseChunkIndexer {gnes_config: {name: IndexerService}}' ]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) with ServiceManager(IndexerService, args), ZmqClient(c_args) as client: msg = gnes_pb2.Message() d = msg.request.index.docs.add() c = d.chunks.add() c.doc_id = 0 c.embedding.CopyFrom(array2blob(self.test_numeric)) c.offset = 0 c.weight = 1.0 client.send_message(msg) r = client.recv_message() self.assertEqual(r.response.index.status, gnes_pb2.Response.SUCCESS)
def test_chunk_reduce_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.chunk_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() s = msg.response.search.topk_results.add() s.score = 0.1 s.score_explained = '1-c1' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score = 0.2 s.score_explained = '1-c2' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score = 0.3 s.score_explained = '1-c3' s.chunk.doc_id = 1 msg.envelope.num_part = 2 c1.send_message(msg) msg.response.search.ClearField('topk_results') s = msg.response.search.topk_results.add() s.score = 0.2 s.score_explained = '2-c1' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score = 0.2 s.score_explained = '2-c2' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score = 0.3 s.score_explained = '2-c3' s.chunk.doc_id = 3 msg.envelope.num_part = 2 c1.send_message(msg) r = c1.recv_message() self.assertEqual(r.envelope.num_part, 1) self.assertEqual(len(r.response.search.topk_results), 3) self.assertGreaterEqual(r.response.search.topk_results[0].score, r.response.search.topk_results[-1].score) print(r.response.search.topk_results) self.assertEqual(r.response.search.topk_results[0].score_explained, '1-c1\n1-c3\n2-c1\n') self.assertEqual(r.response.search.topk_results[1].score_explained, '1-c2\n2-c2\n') self.assertEqual(r.response.search.topk_results[2].score_explained, '2-c3\n') self.assertAlmostEqual(r.response.search.topk_results[0].score, 0.6) self.assertAlmostEqual(r.response.search.topk_results[1].score, 0.4) self.assertAlmostEqual(r.response.search.topk_results[2].score, 0.3)
def test_doc_combine_score_fn(self): from gnes.indexer.doc.dict import DictIndexer document_list = [] document_id_list = [] for j in range(1, 4): d = gnes_pb2.Document() for i in range(1, 4): c = d.chunks.add() c.doc_id = j c.offset = i c.weight = 1 / 3 document_id_list.append(j) document_list.append(d) self.chunk_router_yaml = 'Chunk2DocTopkReducer' args = set_router_parser().parse_args([ '--yaml_path', self.chunk_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() s = msg.response.search.topk_results.add() s.score.value = 0.1 s.score.explained = '"1-c1"' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score.value = 0.2 s.score.explained = '"1-c2"' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score.value = 0.3 s.score.explained = '"1-c3"' s.chunk.doc_id = 1 msg.envelope.num_part.extend([1, 2]) c1.send_message(msg) msg.response.search.ClearField('topk_results') s = msg.response.search.topk_results.add() s.score.value = 0.2 s.score.explained = '"2-c1"' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score.value = 0.2 s.score.explained = '"2-c2"' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score.value = 0.3 s.score.explained = '"2-c3"' s.chunk.doc_id = 3 c1.send_message(msg) r = c1.recv_message() doc_indexer = DictIndexer(score_fn=CoordDocScoreFn()) doc_indexer.add(keys=document_id_list, docs=document_list) queried_result = doc_indexer.query_and_score( docs=r.response.search.topk_results, top_k=2)
def test_chunk_reduce_router(self): args = set_router_parser().parse_args([ '--yaml_path', self.chunk_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() s = msg.response.search.topk_results.add() s.score.value = 0.1 s.score.explained = '"1-c1"' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score.value = 0.2 s.score.explained = '"1-c2"' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score.value = 0.3 s.score.explained = '"1-c3"' s.chunk.doc_id = 1 msg.envelope.num_part.extend([1, 2]) c1.send_message(msg) msg.response.search.ClearField('topk_results') s = msg.response.search.topk_results.add() s.score.value = 0.2 s.score.explained = '"2-c1"' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score.value = 0.2 s.score.explained = '"2-c2"' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score.value = 0.3 s.score.explained = '"2-c3"' s.chunk.doc_id = 3 c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) self.assertEqual(len(r.response.search.topk_results), 3) self.assertGreaterEqual(r.response.search.topk_results[0].score.value, r.response.search.topk_results[-1].score.value) print(r.response.search.topk_results) self.assertEqual(json.loads(r.response.search.topk_results[0].score.explained)['operand'], ['1-c1', '1-c3', '2-c1']) self.assertEqual(json.loads(r.response.search.topk_results[1].score.explained)['operand'], ['1-c2', '2-c2']) self.assertEqual(json.loads(r.response.search.topk_results[2].score.explained)['operand'], ['2-c3']) self.assertAlmostEqual(r.response.search.topk_results[0].score.value, 0.6) self.assertAlmostEqual(r.response.search.topk_results[1].score.value, 0.4) self.assertAlmostEqual(r.response.search.topk_results[2].score.value, 0.3)
def test_doc_sum_reduce_router(self): args = set_router_parser().parse_args([ '--yaml_path', self.doc_sum_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() s = msg.response.search.topk_results.add() s.score.value = 0.4 s.doc.doc_id = 1 s.doc.raw_text = 'd3' s.score.explained = '1-d3\n' s = msg.response.search.topk_results.add() s.score.value = 0.3 s.doc.doc_id = 2 s.doc.raw_text = 'd2' s.score.explained = '1-d2\n' s = msg.response.search.topk_results.add() s.score.value = 0.2 s.doc.doc_id = 3 s.doc.raw_text = 'd1' s.score.explained = '1-d3\n' msg.envelope.num_part.extend([1, 2]) c1.send_message(msg) msg.response.search.ClearField('topk_results') s = msg.response.search.topk_results.add() s.score.value = 0.5 s.doc.doc_id = 1 s.doc.raw_text = 'd2' s.score.explained = '2-d2\n' s = msg.response.search.topk_results.add() s.score.value = 0.2 s.doc.doc_id = 2 s.doc.raw_text = 'd1' s.score.explained = '2-d1\n' s = msg.response.search.topk_results.add() s.score.value = 0.1 s.doc.doc_id = 3 s.doc.raw_text = 'd3' s.score.explained = '2-d3\n' msg.response.search.top_k = 5 c1.send_message(msg) r = c1.recv_message() print(r.response.search.topk_results) self.assertSequenceEqual(r.envelope.num_part, [1]) self.assertEqual(len(r.response.search.topk_results), 3) self.assertGreaterEqual(r.response.search.topk_results[0].score.value, r.response.search.topk_results[-1].score.value)
def test_multimap_multireduce(self): # p1 -> # p21 -> # r311 # r312 # -> r41 # -> r5 # p22 -> # r321 # r322 # -> r42 # -> r5 # -> client p1 = set_router_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_in', str(SocketType.PULL_CONNECT), '--socket_out', str(SocketType.PUB_BIND), ]) r5 = set_router_parser().parse_args([ '--yaml_path', self.reduce_router_yaml, '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_CONNECT), ]) r41 = set_router_parser().parse_args([ '--yaml_path', self.reduce_router_yaml, '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_out', str(r5.port_in) ]) r42 = set_router_parser().parse_args([ '--yaml_path', self.reduce_router_yaml, '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_out', str(r5.port_in) ]) p21 = set_router_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUB_BIND), '--port_in', str(p1.port_out) ]) p22 = set_router_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUB_BIND), '--port_in', str(p1.port_out) ]) r311 = set_router_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p21.port_out), '--port_out', str(r41.port_in), '--yaml_path', 'BaseRouter' ]) r312 = set_router_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p21.port_out), '--port_out', str(r41.port_in), '--yaml_path', 'BaseRouter' ]) r321 = set_router_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p22.port_out), '--port_out', str(r42.port_in), '--yaml_path', 'BaseRouter' ]) r322 = set_router_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p22.port_out), '--port_out', str(r42.port_in), '--yaml_path', 'BaseRouter' ]) c_args = _set_client_parser().parse_args([ '--port_in', str(r5.port_out), '--port_out', str(p1.port_in), '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_BIND), ]) with RouterService(p1), RouterService(r5), \ RouterService(p21), RouterService(p22), \ RouterService(r311), RouterService(r312), RouterService(r321), RouterService(r322), \ RouterService(r41), RouterService(r42), \ ZmqClient(c_args) as c1: msg = gnes_pb2.Message() msg.envelope.num_part.append(1) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) print(r.envelope.routes)