def test_singleton_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args( ['--yaml_path', self.singleton_img_pre_yaml]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip')) all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()] with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(all_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() self.assertEqual( r.envelope.routes[0].service, 'PreprocessorService:BaseSingletonPreprocessor') for d in r.request.index.docs: self.assertEqual(len(d.chunks), 1) self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) self.assertEqual( blob2array(d.chunks[0].blob).shape[0], 224) self.assertEqual( blob2array(d.chunks[0].blob).shape[1], 224) print(blob2array(d.chunks[0].blob).dtype)
def test_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args([]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) with open(os.path.join(self.dirname, '26-doc-chinese.txt'), 'r', encoding='utf8') as fp: msg = gnes_pb2.Message() all_text = '' for v in fp: if v.strip(): d = msg.request.train.docs.add() d.raw_text = v all_text += v with PreprocessorService(args), ZmqClient(c_args) as client: client.send_message(msg) r = client.recv_message() print(r) msg1 = gnes_pb2.Message() msg1.request.index.docs.extend(msg.request.train.docs) client.send_message(msg1) r = client.recv_message() print(r) msg2 = gnes_pb2.Message() msg2.request.search.query.raw_text = all_text client.send_message(msg2) r = client.recv_message() print(r)
def test_concat_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.concat_router_yaml, '--socket_out', str(SocketType.PUSH_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.PULL_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() msg.request.search.query.chunk_embeddings.CopyFrom(array2blob(np.random.random([5, 2]))) msg.envelope.num_part.extend([1, 3]) c1.send_message(msg) c1.send_message(msg) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) print(r.envelope.routes) self.assertEqual(r.request.search.query.chunk_embeddings.shape, [5, 6]) for j in range(1, 4): d = msg.request.index.docs.add() d.chunk_embeddings.CopyFrom(array2blob(np.random.random([5, 2 * j]))) c1.send_message(msg) c1.send_message(msg) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) for j in range(1, 4): self.assertEqual(r.request.index.docs[j - 1].chunk_embeddings.shape, [5, 6 * j])
def test_chunk_reduce_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.chunk_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() s = msg.response.search.topk_results.add() s.score = 0.1 s.score_explained = '1-c1' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score = 0.2 s.score_explained = '1-c2' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score = 0.3 s.score_explained = '1-c3' s.chunk.doc_id = 1 msg.envelope.num_part.extend([1, 2]) c1.send_message(msg) msg.response.search.ClearField('topk_results') s = msg.response.search.topk_results.add() s.score = 0.2 s.score_explained = '2-c1' s.chunk.doc_id = 1 s = msg.response.search.topk_results.add() s.score = 0.2 s.score_explained = '2-c2' s.chunk.doc_id = 2 s = msg.response.search.topk_results.add() s.score = 0.3 s.score_explained = '2-c3' s.chunk.doc_id = 3 c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) self.assertEqual(len(r.response.search.topk_results), 3) self.assertGreaterEqual(r.response.search.topk_results[0].score, r.response.search.topk_results[-1].score) print(r.response.search.topk_results) self.assertEqual(r.response.search.topk_results[0].score_explained, '1-c1\n1-c3\n2-c1\n') self.assertEqual(r.response.search.topk_results[1].score_explained, '1-c2\n2-c2\n') self.assertEqual(r.response.search.topk_results[2].score_explained, '2-c3\n') self.assertAlmostEqual(r.response.search.topk_results[0].score, 0.6) self.assertAlmostEqual(r.response.search.topk_results[1].score, 0.4) self.assertAlmostEqual(r.response.search.topk_results[2].score, 0.3)
def test_doc_reduce_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.doc_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() # shard1 only has d1 s = msg.response.search.topk_results.add() s.score = 0.1 s.doc.doc_id = 1 s.doc.raw_text = 'd1' s = msg.response.search.topk_results.add() s.score = 0.2 s.doc.doc_id = 2 s = msg.response.search.topk_results.add() s.score = 0.3 s.chunk.doc_id = 3 msg.envelope.num_part = 2 c1.send_message(msg) msg.response.search.ClearField('topk_results') # shard2 has d2 and d3 s = msg.response.search.topk_results.add() s.score = 0.1 s.doc.doc_id = 1 s = msg.response.search.topk_results.add() s.score = 0.2 s.doc.doc_id = 2 s.doc.raw_text = 'd2' s = msg.response.search.topk_results.add() s.score = 0.3 s.chunk.doc_id = 3 s.doc.raw_text = 'd3' msg.response.search.top_k = 5 msg.envelope.num_part = 2 c1.send_message(msg) r = c1.recv_message() print(r.response.search.topk_results) self.assertEqual(r.envelope.num_part, 1) self.assertEqual(len(r.response.search.topk_results), 3) self.assertGreaterEqual(r.response.search.topk_results[0].score, r.response.search.topk_results[-1].score)
def test_publish_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_out', str(SocketType.PUB_BIND) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), '--socket_in', str(SocketType.SUB_CONNECT) ]) with RouterService(args), ZmqClient(c_args) as c1, ZmqClient(c_args) as c2: msg = gnes_pb2.Message() msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) msg.envelope.num_part.append(1) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1, 2]) r = c2.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1, 2])
def test_preprocessor_service_echo(self): args = set_preprocessor_service_parser().parse_args([]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in) ]) with PreprocessorService(args), ZmqClient(c_args) as client: msg = gnes_pb2.Message() msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) client.send_message(msg) r = client.recv_message() print(r) msg.request.train.docs.extend([gnes_pb2.Document() for _ in range(5)]) client.send_message(msg) r = client.recv_message() print(r)
def test_map_router(self): args = set_router_service_parser().parse_args([ '--yaml_path', self.batch_router_yaml, ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in), ]) with RouterService(args), ZmqClient(c_args) as c1: msg = gnes_pb2.Message() msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) c1.send_message(msg) r = c1.recv_message() self.assertEqual(len(r.request.index.docs), 2) r = c1.recv_message() self.assertEqual(len(r.request.index.docs), 2) r = c1.recv_message() self.assertEqual(len(r.request.index.docs), 1)
def test_video_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args([ '--yaml_path', self.yml_path ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in) ]) video_bytes = [open(os.path.join(self.video_path, _), 'rb').read() for _ in os.listdir(self.video_path)] with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(video_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertGreater(len(d.chunks), 0) for _ in range(len(d.chunks)): shape = blob2array(d.chunks[_].blob).shape self.assertEqual(shape, (168, 192, 3))
def test_multimap_multireduce(self): # p1 -> # p21 -> # r311 # r312 # -> r41 # -> r5 # p22 -> # r321 # r322 # -> r42 # -> r5 # -> client p1 = set_router_service_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_in', str(SocketType.PULL_CONNECT), '--socket_out', str(SocketType.PUB_BIND), ]) r5 = set_router_service_parser().parse_args([ '--yaml_path', self.reduce_router_yaml, '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_CONNECT), ]) r41 = set_router_service_parser().parse_args([ '--yaml_path', self.reduce_router_yaml, '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_out', str(r5.port_in) ]) r42 = set_router_service_parser().parse_args([ '--yaml_path', self.reduce_router_yaml, '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_out', str(r5.port_in) ]) p21 = set_router_service_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUB_BIND), '--port_in', str(p1.port_out) ]) p22 = set_router_service_parser().parse_args([ '--yaml_path', self.publish_router_yaml, '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUB_BIND), '--port_in', str(p1.port_out) ]) r311 = set_router_service_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p21.port_out), '--port_out', str(r41.port_in) ]) r312 = set_router_service_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p21.port_out), '--port_out', str(r41.port_in) ]) r321 = set_router_service_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p22.port_out), '--port_out', str(r42.port_in) ]) r322 = set_router_service_parser().parse_args([ '--socket_in', str(SocketType.SUB_CONNECT), '--socket_out', str(SocketType.PUSH_CONNECT), '--port_in', str(p22.port_out), '--port_out', str(r42.port_in) ]) c_args = _set_client_parser().parse_args([ '--port_in', str(r5.port_out), '--port_out', str(p1.port_in), '--socket_in', str(SocketType.PULL_BIND), '--socket_out', str(SocketType.PUSH_BIND), ]) with RouterService(p1), RouterService(r5), \ RouterService(p21), RouterService(p22), \ RouterService(r311), RouterService(r312), RouterService(r321), RouterService(r322), \ RouterService(r41), RouterService(r42), \ ZmqClient(c_args) as c1: msg = gnes_pb2.Message() msg.envelope.num_part.append(1) c1.send_message(msg) r = c1.recv_message() self.assertSequenceEqual(r.envelope.num_part, [1]) print(r.envelope.routes)