def run_transfer_worker(pool_address, session_id, chunk_keys, spill_dir, msg_queue): options.worker.spill_directory = spill_dir plasma_size = 1024 * 1024 * 10 # don't use multiple with-statement as we need the options be forked with plasma.start_plasma_store(plasma_size) as store_args: options.worker.plasma_socket = plasma_socket = store_args[0] plasma_client = plasma.connect(plasma_socket, '', 0) with start_transfer_test_pool(address=pool_address, plasma_size=plasma_size) as pool: chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name()) mapper_ref = pool.actor_ref(PlasmaKeyMapActor.default_name()) plasma_store = PlasmaChunkStore(plasma_client, mapper_ref) for _ in range(2): pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4())) pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4())) for idx in range(0, len(chunk_keys) - 7): data = np.ones((640 * 1024,), dtype=np.int16) * idx write_spill_file(chunk_keys[idx], data) for idx in range(len(chunk_keys) - 7, len(chunk_keys)): data = np.ones((640 * 1024,), dtype=np.int16) * idx plasma_store.put(session_id, chunk_keys[idx], data) chunk_holder_ref.register_chunk(session_id, chunk_keys[idx]) msg_queue.put(plasma_socket) t = time.time() while True: try: msg_queue.get_nowait() except Empty: if time.time() > t + 60: raise SystemError('Transfer finish timed out.') pool.sleep(0.1)
def testReceiver(self): pool_addr = 'localhost:%d' % get_next_port() options.worker.spill_directory = os.path.join( tempfile.gettempdir(), 'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker))) session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) serialized_mock_data = dataserializer.dumps(mock_data) serialized_crc32 = zlib.crc32(serialized_mock_data) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) chunk_key3 = str(uuid.uuid4()) chunk_key4 = str(uuid.uuid4()) chunk_key5 = str(uuid.uuid4()) chunk_key6 = str(uuid.uuid4()) with start_transfer_test_pool(address=pool_addr, plasma_size=self.plasma_storage_size) as pool: chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name()) mapper_ref = pool.actor_ref(PlasmaKeyMapActor.default_name()) receiver_ref = pool.create_actor(ReceiverActor, uid=str(uuid.uuid4())) store = PlasmaChunkStore(self._plasma_client, mapper_ref) # check_status on receiving and received self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.NOT_STARTED) write_spill_file(chunk_key1, mock_data) self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) os.unlink(build_spill_file_name(chunk_key1)) ref = store.put(session_id, chunk_key1, mock_data) data_size = store.get_actual_size(session_id, chunk_key1) chunk_holder_ref.register_chunk(session_id, chunk_key1) del ref self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) with self.run_actor_test(pool) as test_actor: receiver_ref_p = test_actor.promise_ref(receiver_ref) # cancel on an un-run / missing result will result in nothing receiver_ref_p.cancel_receive(session_id, chunk_key2) # start creating writer receiver_ref_p.create_data_writer(session_id, chunk_key1, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVED)) receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVING)) receiver_ref_p.cancel_receive(session_id, chunk_key2) self.assertEqual(receiver_ref.check_status(session_id, chunk_key2), ReceiveStatus.NOT_STARTED) # test checksum error on receive_data_part receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data, 0) with self.assertRaises(ChecksumMismatch): self.get_result(5) # test checksum error on finish_receive receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key2, 0) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(ChecksumMismatch): self.get_result(5) receiver_ref_p.cancel_receive(session_id, chunk_key2) # test intermediate cancellation receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.cancel_receive(session_id, chunk_key2) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data[64:], serialized_crc32) with self.assertRaises(ExecutionInterrupted): self.get_result(5) # test transfer in memory receiver_ref_p.register_finish_callback(session_id, chunk_key3, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key3, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.receive_data_part(session_id, chunk_key3, serialized_mock_data[64:], serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key3, serialized_crc32) self.assertTupleEqual((), self.get_result(5)) receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVED)) # test transfer in spill file def mocked_store_create(*_): raise StoreFull with patch_method(PlasmaChunkStore.create, new=mocked_store_create): # test receive aborted receiver_ref_p.create_data_writer( session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key4, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.cancel_receive(session_id, chunk_key4) with self.assertRaises(ExecutionInterrupted): self.get_result(5) # test receive into spill receiver_ref_p.create_data_writer( session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key4, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key4, serialized_crc32) self.assertTupleEqual((), self.get_result(5)) # test intermediate error def mocked_store_create(*_): raise SpillNotConfigured with patch_method(PlasmaChunkStore.create, new=mocked_store_create): receiver_ref_p.create_data_writer( session_id, chunk_key5, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False), lambda *s: test_actor.set_result(s, accept=False, destroy=False)) with self.assertRaises(SpillNotConfigured): self.get_result(5) # test receive timeout receiver_ref_p.register_finish_callback(session_id, chunk_key6, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.create_data_writer(session_id, chunk_key6, data_size, test_actor, timeout=2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key6, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) with self.assertRaises(TimeoutError): self.get_result(5)
def testSender(self): send_pool_addr = 'localhost:%d' % get_next_port() recv_pool_addr = 'localhost:%d' % get_next_port() recv_pool_addr2 = 'localhost:%d' % get_next_port() options.worker.spill_directory = os.path.join( tempfile.gettempdir(), 'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker))) session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) @contextlib.contextmanager def start_send_recv_pool(): with start_transfer_test_pool( address=send_pool_addr, plasma_size=self.plasma_storage_size) as sp: sp.create_actor(SenderActor, uid=SenderActor.default_name()) with start_transfer_test_pool( address=recv_pool_addr, plasma_size=self.plasma_storage_size) as rp: rp.create_actor(MockReceiverActor, uid=ReceiverActor.default_name()) yield sp, rp with start_send_recv_pool() as (send_pool, recv_pool): chunk_holder_ref = send_pool.actor_ref(ChunkHolderActor.default_name()) sender_ref = send_pool.actor_ref(SenderActor.default_name()) receiver_ref = recv_pool.actor_ref(ReceiverActor.default_name()) sender_mapper_ref = send_pool.actor_ref(PlasmaKeyMapActor.default_name()) store = PlasmaChunkStore(self._plasma_client, sender_mapper_ref) with self.run_actor_test(send_pool) as test_actor: # send when data missing sender_ref_p = test_actor.promise_ref(sender_ref) sender_ref_p.send_data(session_id, str(uuid.uuid4()), recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(DependencyMissing): self.get_result(5) # send data in spill write_spill_file(chunk_key1, mock_data) sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) self.get_result(5) assert_array_equal(mock_data, receiver_ref.get_result_data(session_id, chunk_key1)) os.unlink(build_spill_file_name(chunk_key1)) # send data in plasma store store.put(session_id, chunk_key1, mock_data) chunk_holder_ref.register_chunk(session_id, chunk_key1) sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) self.get_result(5) assert_array_equal(mock_data, receiver_ref.get_result_data(session_id, chunk_key1)) # send data to multiple targets with start_transfer_test_pool( address=recv_pool_addr2, plasma_size=self.plasma_storage_size) as rp2: recv_ref2 = rp2.create_actor(MockReceiverActor, uid=ReceiverActor.default_name()) sender_ref_p.send_data(session_id, chunk_key1, [recv_pool_addr, recv_pool_addr2], _promise=True) # send data to already transferred / transferring sender_ref_p.send_data(session_id, chunk_key1, [recv_pool_addr, recv_pool_addr2], _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) self.get_result(5) assert_array_equal(mock_data, recv_ref2.get_result_data(session_id, chunk_key1)) # send data to non-exist endpoint which causes error store.put(session_id, chunk_key2, mock_data) chunk_holder_ref.register_chunk(session_id, chunk_key2) sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(BrokenPipeError): self.get_result(5) def mocked_receive_data_part(*_): raise ChecksumMismatch with patch_method(MockReceiverActor.receive_data_part, new=mocked_receive_data_part): sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(ChecksumMismatch): self.get_result(5)