def testOperandPrepush(self): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) mock_workers = ['localhost:12345'] with self._prepare_test_graph(session_id, graph_key, mock_workers) as (pool, graph_ref): input_op_keys, mid_op_key, output_op_keys = self._filter_graph_level_op_keys( graph_ref) fake_exec_ref = pool.create_actor(FakeExecutionActor, 0.5) input_refs = [ pool.actor_ref(OperandActor.gen_uid(session_id, k)) for k in input_op_keys ] mid_ref = pool.actor_ref( OperandActor.gen_uid(session_id, mid_op_key)) def _fake_raw_execution_ref(*_, **__): return fake_exec_ref with patch_method(OperandActor._get_raw_execution_ref, new=_fake_raw_execution_ref),\ patch_method(AssignerActor.get_worker_assignments, new=lambda *_: mock_workers): input_refs[0].start_operand(OperandState.READY) input_refs[1].start_operand(OperandState.READY) start_time = time.time() # submission without pre-push will fail while mid_ref.get_state() != OperandState.FINISHED: pool.sleep(0.1) if time.time() - start_time > 30: raise TimeoutError( 'Check middle chunk state timed out.')
def testErrorOnPrepare(self, *_): session_id = str(uuid.uuid4()) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.default_name()) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) # error occurred in create_operand_actors graph_key = str(uuid.uuid4()) expr = mt.random.random((8, 2), chunk_size=2) + 1 graph = expr.build_graph(compose=False) serialized_graph = serialize_graph(graph) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name(session_id, graph_key)) def _mock_raises(*_, **__): raise RuntimeError with patch_method(GraphActor.create_operand_actors, new=_mock_raises): with self.assertRaises(RuntimeError): graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.FAILED) graph_ref.destroy() # interrupted during create_operand_actors graph_key = str(uuid.uuid4()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name(session_id, graph_key)) def _mock_cancels(*_, **__): graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key)) graph_meta_ref.set_state(GraphState.CANCELLING) with patch_method(GraphActor.create_operand_actors, new=_mock_cancels): graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED) # interrupted during previous steps graph_key = str(uuid.uuid4()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name(session_id, graph_key)) def _mock_cancels(*_, **__): graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key)) graph_meta_ref.set_state(GraphState.CANCELLING) return dict() with patch_method(GraphAnalyzer.calc_operand_assignments, new=_mock_cancels): graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)
def testDestroyCalcActor(self): import gevent.event with self._start_calc_pool() as (_pool, test_actor): calc_ref = _pool.actor_ref(CpuCalcActor.default_uid()) calc_ref.mark_destroy() gevent.sleep(0.8) self.assertFalse(_pool.has_actor(calc_ref)) with self._start_calc_pool() as (_pool, test_actor): calc_ref = test_actor.promise_ref(CpuCalcActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.random((10, 10)) for _ in range(2)] exec_graph, fetch_chunks, add_chunk = self._build_test_graph( data_list) exec_graph2, fetch_chunks2, add_chunk2 = self._build_test_graph( data_list[::-1]) storage_client = test_actor.storage_client for fetch_chunk, d in zip(fetch_chunks, data_list): self.waitp( storage_client.put_objects( session_id, [fetch_chunk.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) for fetch_chunk2, d in zip(fetch_chunks2, data_list[::-1]): self.waitp( storage_client.put_objects( session_id, [fetch_chunk2.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) orig_calc_results = CpuCalcActor._calc_results start_event = gevent.event.Event() def _mock_calc_delayed(actor_obj, *args, **kwargs): start_event.set() gevent.sleep(1) return orig_calc_results(actor_obj, *args, **kwargs) with patch_method(CpuCalcActor._calc_results, _mock_calc_delayed): p = calc_ref.calc(session_id, add_chunk.op.key, serialize_graph(exec_graph), [add_chunk.key], _promise=True) \ .then(lambda *_: calc_ref.store_results( session_id, add_chunk.op.key, [add_chunk.key], None, _promise=True)) start_event.wait() calc_ref.mark_destroy() p2 = calc_ref.calc(session_id, add_chunk2.op.key, serialize_graph(exec_graph2), [add_chunk2.key], _promise=True) \ .then(lambda *_: calc_ref.store_results( session_id, add_chunk2.op.key, [add_chunk2.key], None, _promise=True)) self.assertTrue(_pool.has_actor(calc_ref._ref)) self.waitp(p) self.waitp(p2) gevent.sleep(0.8) self.assertFalse(_pool.has_actor(calc_ref._ref))
def testCpuCalcErrorInRunning(self): with self._start_calc_pool() as (_pool, test_actor): calc_ref = test_actor.promise_ref(CpuCalcActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.random((10, 10)) for _ in range(2)] exec_graph, fetch_chunks, add_chunk = self._build_test_graph( data_list) storage_client = test_actor.storage_client for fetch_chunk, d in zip(fetch_chunks, data_list): self.waitp( storage_client.put_objects( session_id, [fetch_chunk.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) def _mock_calc_results_error(*_, **__): raise ValueError with patch_method(CpuCalcActor._calc_results, _mock_calc_results_error), \ self.assertRaises(ValueError): self.waitp( calc_ref.calc( session_id, add_chunk.op.key, serialize_graph(exec_graph), [add_chunk.key], _promise=True).then(lambda *_: calc_ref.store_results( session_id, add_chunk.op.key, [add_chunk.key], None, _promise=True)))
def testReadyState(self, *_): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) mock_workers = ['localhost:12345', 'localhost:23456'] def _mock_get_workers_meta(*_, **__): return dict((w, dict(hardware=dict(cpu_total=1, memory=1024**3))) for w in mock_workers) with patch_method(ResourceActor.get_workers_meta, new=_mock_get_workers_meta) as _, \ self._prepare_test_graph(session_id, graph_key, mock_workers) as (pool, graph_ref): input_op_keys, mid_op_key, output_op_keys = self._filter_graph_level_op_keys( graph_ref) meta_client = ChunkMetaClient( pool, pool.actor_ref(SchedulerClusterInfoActor.default_uid())) op_ref = pool.actor_ref( OperandActor.gen_uid(session_id, mid_op_key)) resource_ref = pool.actor_ref(ResourceActor.default_uid()) input_refs = [ pool.actor_ref(OperandActor.gen_uid(session_id, k)) for k in input_op_keys ] def test_entering_state(target): for key in input_op_keys: op_ref.remove_finished_predecessor(key) op_ref.start_operand(OperandState.UNSCHEDULED) for ref in input_refs: ref.start_operand(OperandState.UNSCHEDULED) for ref in input_refs: self.assertEqual(op_ref.get_state(), OperandState.UNSCHEDULED) ref.start_operand(OperandState.FINISHED) pool.sleep(1) self.assertEqual(target, op_ref.get_state()) for w in mock_workers: resource_ref.deallocate_resource(session_id, mid_op_key, w) # test entering state with no input meta test_entering_state(OperandState.UNSCHEDULED) # fill meta input_chunk_keys, _, _ = self._filter_graph_level_chunk_keys( graph_ref) for ck in input_chunk_keys: meta_client.set_chunk_meta(session_id, ck, workers=('localhost:12345', ), size=800) # test successful entering state test_entering_state(OperandState.READY)
def testReadyState(self, *_): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) mock_workers = ['localhost:12345', 'localhost:23456'] with self._prepare_test_graph(session_id, graph_key, mock_workers) as (pool, graph_ref): input_op_keys, mid_op_key, output_op_keys = self._filter_graph_level_op_keys( graph_ref) meta_client = ChunkMetaClient( pool, pool.actor_ref(SchedulerClusterInfoActor.default_name())) op_ref = pool.actor_ref( OperandActor.gen_uid(session_id, mid_op_key)) input_refs = [ pool.actor_ref(OperandActor.gen_uid(session_id, k)) for k in input_op_keys ] def test_entering_state(target): for key in input_op_keys: op_ref.remove_finished_predecessor(key) op_ref.start_operand(OperandState.UNSCHEDULED) for ref in input_refs: ref.start_operand(OperandState.UNSCHEDULED) for ref in input_refs: self.assertEqual(op_ref.get_state(), OperandState.UNSCHEDULED) ref.start_operand(OperandState.FINISHED) pool.sleep(0.5) self.assertEqual(target, op_ref.get_state()) # test entering state with no input meta test_entering_state(OperandState.UNSCHEDULED) # fill meta input_chunk_keys, _, _ = self._filter_graph_level_chunk_keys( graph_ref) for ck in input_chunk_keys: meta_client.set_chunk_meta(session_id, ck, workers=('localhost:12345', ), size=800) # test entering state with failure in fetching sizes with patch_method(ChunkMetaClient.batch_get_chunk_size, new=lambda *_: [None, None]): test_entering_state(OperandState.UNSCHEDULED) # test successful entering state test_entering_state(OperandState.READY)
def testMemQuotaAllocation(self): from mars import resource from mars.utils import AttributeDict mock_mem_stat = AttributeDict( dict(total=300, available=50, used=0, free=50)) local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \ patch_method(resource.virtual_memory, new=lambda: mock_mem_stat): pool.create_actor(WorkerClusterInfoActor, schedulers=[local_pool_addr], uid=WorkerClusterInfoActor.default_name()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(ProcessHelperActor, uid=ProcessHelperActor.default_name()) quota_ref = pool.create_actor(MemQuotaActor, 300, refresh_time=0.1, uid=MemQuotaActor.default_name()) time_recs = [] with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(quota_ref) time_recs.append(time.time()) def actual_exec(x): ref.release_quota(x) time_recs.append(time.time()) test_actor.set_result(None) ref.request_quota('req', 100, _promise=True) \ .then(functools.partial(actual_exec, 'req')) pool.sleep(0.5) mock_mem_stat['available'] = 150 mock_mem_stat['free'] = 150 self.get_result(2) self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
def testServiceArgs(self): svc = WorkerService(ignore_avail_mem=True) self.assertGreaterEqual(svc._cache_mem_size, 0) self.assertIsInstance(svc._soft_mem_limit, int) self.assertIsInstance(svc._hard_mem_limit, int) self.assertIsInstance(svc._cache_mem_size, int) svc = WorkerService(ignore_avail_mem=True, total_mem=256 * 1024 * 1024) self.assertEqual(svc._total_mem, 256 * 1024**2) svc = WorkerService(ignore_avail_mem=True, total_mem='512m') self.assertEqual(svc._total_mem, 512 * 1024**2) with self.assertRaises(MemoryError): WorkerService(soft_mem_limit='128m', cache_mem_size='256m') with self.assertRaises(MemoryError), \ patch_method(WorkerService._get_plasma_size, new=lambda *_, **__: 0): WorkerService(min_cache_mem_size='1g', cache_mem_size='256m') svc = WorkerService(ignore_avail_mem=True, spill_dirs='/tmp/a', min_cache_mem_size=0) self.assertListEqual(svc._spill_dirs, ['/tmp/a']) svc = WorkerService(ignore_avail_mem=True, n_cpu_process=4, n_net_process=2, min_cache_mem_size=0) self.assertEqual(svc.n_process, 7) svc = WorkerService(ignore_avail_mem=True, n_cpu_process=4, n_net_process=2, spill_dirs='/tmp/a', min_cache_mem_size=0) self.assertEqual(svc.n_process, 8) svc = WorkerService(ignore_avail_mem=True, n_cpu_process=4, n_net_process=2, spill_dirs=['/tmp/a', '/tmp/b'], min_cache_mem_size=0) self.assertEqual(svc.n_process, 8)
def testOperandActorWithAssignRetryAndFail(self, *_): arr = mt.random.randint(10, size=(10, 8), chunk_size=4) arr_add = mt.random.randint(10, size=(10, 8), chunk_size=4) arr2 = arr + arr_add def _allocate_raises(*_, **__): raise TimeoutError session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) try: options.scheduler.retry_delay = 0 with patch_method(AssignEvaluationActor._allocate_resource, new=_allocate_raises): self._run_operand_case( session_id, graph_key, arr2, lambda pool, uid: pool. create_actor(FakeExecutionActor, fail_count=5, uid=uid)) finally: options.scheduler.retry_delay = 60
def testSender(self): send_pool_addr = 'localhost:%d' % get_next_port() recv_pool_addr = 'localhost:%d' % get_next_port() recv_pool_addr2 = 'localhost:%d' % get_next_port() options.worker.spill_directory = os.path.join( tempfile.gettempdir(), 'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker))) session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) @contextlib.contextmanager def start_send_recv_pool(): with start_transfer_test_pool( address=send_pool_addr, plasma_size=self.plasma_storage_size) as sp: sp.create_actor(SenderActor, uid=SenderActor.default_name()) with start_transfer_test_pool( address=recv_pool_addr, plasma_size=self.plasma_storage_size) as rp: rp.create_actor(MockReceiverActor, uid=ReceiverActor.default_name()) yield sp, rp with start_send_recv_pool() as (send_pool, recv_pool): chunk_holder_ref = send_pool.actor_ref(ChunkHolderActor.default_name()) sender_ref = send_pool.actor_ref(SenderActor.default_name()) receiver_ref = recv_pool.actor_ref(ReceiverActor.default_name()) sender_mapper_ref = send_pool.actor_ref(PlasmaKeyMapActor.default_name()) store = PlasmaChunkStore(self._plasma_client, sender_mapper_ref) with self.run_actor_test(send_pool) as test_actor: # send when data missing sender_ref_p = test_actor.promise_ref(sender_ref) sender_ref_p.send_data(session_id, str(uuid.uuid4()), recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(DependencyMissing): self.get_result(5) # send data in spill write_spill_file(chunk_key1, mock_data) sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) self.get_result(5) assert_array_equal(mock_data, receiver_ref.get_result_data(session_id, chunk_key1)) os.unlink(build_spill_file_name(chunk_key1)) # send data in plasma store store.put(session_id, chunk_key1, mock_data) chunk_holder_ref.register_chunk(session_id, chunk_key1) sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) self.get_result(5) assert_array_equal(mock_data, receiver_ref.get_result_data(session_id, chunk_key1)) # send data to multiple targets with start_transfer_test_pool( address=recv_pool_addr2, plasma_size=self.plasma_storage_size) as rp2: recv_ref2 = rp2.create_actor(MockReceiverActor, uid=ReceiverActor.default_name()) sender_ref_p.send_data(session_id, chunk_key1, [recv_pool_addr, recv_pool_addr2], _promise=True) # send data to already transferred / transferring sender_ref_p.send_data(session_id, chunk_key1, [recv_pool_addr, recv_pool_addr2], _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) self.get_result(5) assert_array_equal(mock_data, recv_ref2.get_result_data(session_id, chunk_key1)) # send data to non-exist endpoint which causes error store.put(session_id, chunk_key2, mock_data) chunk_holder_ref.register_chunk(session_id, chunk_key2) sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(BrokenPipeError): self.get_result(5) def mocked_receive_data_part(*_): raise ChecksumMismatch with patch_method(MockReceiverActor.receive_data_part, new=mocked_receive_data_part): sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(ChecksumMismatch): self.get_result(5)
def testClientSpill(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client idx = 0 shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) proc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) def _fill_data(): i = 0 for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)): try: shared_handler.put_objects(session_id, [key], [data]) except StorageFull: break return i + idx idx = _fill_data() # test copying non-existing keys storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(KeyError): self.get_result(5) # test copying into containing locations storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) # test unsuccessful copy when no data at target def _mock_load_from(*_, **__): return promise.finished(*build_exc_info(SystemError), _accept=False) with patch_method(StorageHandler.load_from, _mock_load_from), \ self.assertRaises(SystemError): storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) # test successful copy for multiple objects storage_client.delete(session_id, [data_keys[idx - 1]]) ref_data = weakref.ref(data_list[idx]) ref_data2 = weakref.ref(data_list[idx + 1]) proc_handler.put_objects(session_id, data_keys[idx:idx + 2], data_list[idx:idx + 2]) data_list[idx:idx + 2] = [None, None] storage_client.copy_to(session_id, data_keys[idx:idx + 2], [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) proc_handler.delete(session_id, data_keys[idx:idx + 2]) self.assertEqual( storage_manager_ref.get_data_locations( session_id, data_keys[idx:idx + 2]), [{(0, DataStorageDevice.SHARED_MEMORY)}, {(0, DataStorageDevice.DISK)}]) self.assertIsNone(ref_data()) self.assertIsNone(ref_data2()) # test copy with spill idx += 2 proc_handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])
def testCpuCalcSingleFetches(self): import gc with self._start_calc_pool() as (_pool, test_actor): quota_ref = test_actor.promise_ref(MemQuotaActor.default_uid()) calc_ref = test_actor.promise_ref(CpuCalcActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.random((10, 10)) for _ in range(3)] exec_graph, fetch_chunks, add_chunk = self._build_test_graph( data_list) storage_client = test_actor.storage_client for fetch_chunk, d in zip(fetch_chunks, data_list): self.waitp( storage_client.put_objects( session_id, [fetch_chunk.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) self.assertEqual( list( storage_client.get_data_locations( session_id, [fetch_chunks[0].key])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) quota_batch = { build_quota_key(session_id, add_chunk.key, add_chunk.op.key): data_list[0].nbytes, } for idx in [1, 2]: quota_batch[build_quota_key(session_id, fetch_chunks[idx].key, add_chunk.op.key)] \ = data_list[idx].nbytes self.waitp( storage_client.copy_to( session_id, [fetch_chunks[idx].key], [DataStorageDevice.DISK ]).then(lambda *_: storage_client.delete( session_id, [fetch_chunks[idx].key], [DataStorageDevice.SHARED_MEMORY]))) self.assertEqual( list( storage_client.get_data_locations( session_id, [fetch_chunks[idx].key])[0]), [(0, DataStorageDevice.DISK)]) self.waitp( quota_ref.request_batch_quota(quota_batch, _promise=True), ) o_create = PlasmaSharedStore.create def _mock_plasma_create(store, session_id, data_key, size): if data_key == fetch_chunks[2].key: raise StorageFull return o_create(store, session_id, data_key, size) id_type_set = set() def _extract_value_ref(*_): inproc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) obj = inproc_handler.get_objects(session_id, [add_chunk.key])[0] id_type_set.add((id(obj), type(obj))) del obj with patch_method(PlasmaSharedStore.create, _mock_plasma_create): self.waitp( calc_ref.calc(session_id, add_chunk.op.key, serialize_graph(exec_graph), [add_chunk.key], _promise=True).then(_extract_value_ref).then( lambda *_: calc_ref.store_results( session_id, add_chunk.op.key, [add_chunk.key], None, _promise=True))) self.assertTrue( all((id(obj), type(obj)) not in id_type_set for obj in gc.get_objects())) quota_dump = quota_ref.dump_data() self.assertEqual(len(quota_dump.allocations), 0) self.assertEqual(len(quota_dump.requests), 0) self.assertEqual(len(quota_dump.proc_sizes), 0) self.assertEqual(len(quota_dump.hold_sizes), 0) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [fetch_chunks[0].key])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [fetch_chunks[1].key])[0]), [(0, DataStorageDevice.DISK)]) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [fetch_chunks[2].key])[0]), [(0, DataStorageDevice.DISK)]) self.assertEqual( sorted( storage_client.get_data_locations(session_id, [add_chunk.key])[0]), [(0, DataStorageDevice.SHARED_MEMORY)])
def testReceiver(self): pool_addr = 'localhost:%d' % get_next_port() options.worker.spill_directory = tempfile.mkdtemp( prefix='mars_test_receiver_') session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) serialized_arrow_data = dataserializer.serialize(mock_data) data_size = serialized_arrow_data.total_bytes serialized_mock_data = serialized_arrow_data.to_buffer() serialized_crc32 = zlib.crc32(serialized_arrow_data.to_buffer()) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) chunk_key3 = str(uuid.uuid4()) chunk_key4 = str(uuid.uuid4()) chunk_key5 = str(uuid.uuid4()) chunk_key6 = str(uuid.uuid4()) chunk_key7 = str(uuid.uuid4()) chunk_key8 = str(uuid.uuid4()) with start_transfer_test_pool( address=pool_addr, plasma_size=self.plasma_storage_size) as pool: receiver_ref = pool.create_actor(ReceiverActor, uid=str(uuid.uuid4())) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client # check_status on receiving and received self.assertEqual( receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.NOT_STARTED) self.waitp( storage_client.create_writer( session_id, chunk_key1, serialized_arrow_data.total_bytes, [DataStorageDevice.DISK ]).then(lambda writer: promise.finished().then( lambda *_: writer.write(serialized_arrow_data)). then(lambda *_: writer.close()))) self.assertEqual( receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) storage_client.delete(session_id, chunk_key1) self.waitp( storage_client.put_object( session_id, chunk_key1, mock_data, [DataStorageDevice.SHARED_MEMORY])) self.assertEqual( receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) receiver_ref_p = test_actor.promise_ref(receiver_ref) # cancel on an un-run / missing result will result in nothing receiver_ref_p.cancel_receive(session_id, chunk_key2) # start creating writer receiver_ref_p.create_data_writer(session_id, chunk_key1, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual( self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVED)) result = receiver_ref_p.create_data_writer(session_id, chunk_key1, data_size, test_actor, use_promise=False) self.assertTupleEqual( result, (receiver_ref.address, ReceiveStatus.RECEIVED)) receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) result = receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, use_promise=False) self.assertTupleEqual( result, (receiver_ref.address, ReceiveStatus.RECEIVING)) receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual( self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVING)) receiver_ref_p.cancel_receive(session_id, chunk_key2) self.assertEqual( receiver_ref.check_status(session_id, chunk_key2), ReceiveStatus.NOT_STARTED) # test checksum error on receive_data_part receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.get_result(5) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data, 0) with self.assertRaises(ChecksumMismatch): self.get_result(5) # test checksum error on finish_receive receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key2, 0) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(ChecksumMismatch): self.get_result(5) receiver_ref_p.cancel_receive(session_id, chunk_key2) # test intermediate cancellation receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) receiver_ref_p.receive_data_part( session_id, chunk_key2, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.cancel_receive(session_id, chunk_key2) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data[64:], serialized_crc32) with self.assertRaises(ExecutionInterrupted): self.get_result(5) # test transfer in memory receiver_ref_p.register_finish_callback(session_id, chunk_key3, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part( session_id, chunk_key3, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.receive_data_part(session_id, chunk_key3, serialized_mock_data[64:], serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key3, serialized_crc32) self.assertTupleEqual((), self.get_result(5)) receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual( self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVED)) # test transfer in spill file def mocked_store_create(*_): raise StorageFull with patch_method(PlasmaSharedStore.create, new=mocked_store_create): with self.assertRaises(StorageFull): receiver_ref_p.create_data_writer(session_id, chunk_key4, data_size, test_actor, ensure_cached=True, use_promise=False) # test receive aborted receiver_ref_p.create_data_writer( session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) receiver_ref_p.receive_data_part( session_id, chunk_key4, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.cancel_receive(session_id, chunk_key4) with self.assertRaises(ExecutionInterrupted): self.get_result(5) # test receive into spill receiver_ref_p.create_data_writer( session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) receiver_ref_p.receive_data_part(session_id, chunk_key4, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key4, serialized_crc32) self.assertTupleEqual((), self.get_result(5)) # test intermediate error def mocked_store_create(*_): raise SpillNotConfigured with patch_method(PlasmaSharedStore.create, new=mocked_store_create): receiver_ref_p.create_data_writer( session_id, chunk_key5, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s), lambda *s: test_actor.set_result(s, accept=False)) with self.assertRaises(SpillNotConfigured): self.get_result(5) # test receive timeout receiver_ref_p.register_finish_callback(session_id, chunk_key6, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) receiver_ref_p.create_data_writer(session_id, chunk_key6, data_size, test_actor, timeout=2, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part( session_id, chunk_key6, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) with self.assertRaises(TimeoutError): self.get_result(5) # test sender halt receiver_ref_p.register_finish_callback(session_id, chunk_key7, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) mock_ref = pool.actor_ref(test_actor.uid, address='MOCK_ADDR') receiver_ref_p.create_data_writer( session_id, chunk_key7, data_size, mock_ref, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part( session_id, chunk_key7, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.notify_dead_senders(['MOCK_ADDR']) with self.assertRaises(WorkerDead): self.get_result(5) # test checksum error on finish_receive result = receiver_ref_p.create_data_writer(session_id, chunk_key8, data_size, test_actor, use_promise=False) self.assertTupleEqual(result, (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key8, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key8, 0)
def testSender(self): send_pool_addr = 'localhost:%d' % get_next_port() recv_pool_addr = 'localhost:%d' % get_next_port() recv_pool_addr2 = 'localhost:%d' % get_next_port() options.worker.spill_directory = tempfile.mkdtemp( prefix='mars_test_sender_') session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) @contextlib.contextmanager def start_send_recv_pool(): with start_transfer_test_pool( address=send_pool_addr, plasma_size=self.plasma_storage_size) as sp: sp.create_actor(SenderActor, uid=SenderActor.default_uid()) with start_transfer_test_pool( address=recv_pool_addr, plasma_size=self.plasma_storage_size) as rp: rp.create_actor(MockReceiverActor, uid=ReceiverActor.default_uid()) yield sp, rp with start_send_recv_pool() as (send_pool, recv_pool): sender_ref = send_pool.actor_ref(SenderActor.default_uid()) receiver_ref = recv_pool.actor_ref(ReceiverActor.default_uid()) with self.run_actor_test(send_pool) as test_actor: storage_client = test_actor.storage_client # send when data missing sender_ref_p = test_actor.promise_ref(sender_ref) sender_ref_p.send_data(session_id, str(uuid.uuid4()), recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(DependencyMissing): self.get_result(5) # send data in spill serialized = dataserializer.serialize(mock_data) self.waitp( storage_client.create_writer( session_id, chunk_key1, serialized.total_bytes, [DataStorageDevice.DISK ]).then(lambda writer: promise.finished().then( lambda *_: writer.write(serialized)).then( lambda *_: writer.close()))) sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) assert_array_equal( mock_data, receiver_ref.get_result_data(session_id, chunk_key1)) storage_client.delete(session_id, chunk_key1) # send data in plasma store self.waitp( storage_client.put_object( session_id, chunk_key1, mock_data, [DataStorageDevice.SHARED_MEMORY])) sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) assert_array_equal( mock_data, receiver_ref.get_result_data(session_id, chunk_key1)) # send data to multiple targets with start_transfer_test_pool( address=recv_pool_addr2, plasma_size=self.plasma_storage_size) as rp2: recv_ref2 = rp2.create_actor( MockReceiverActor, uid=ReceiverActor.default_uid()) self.waitp( sender_ref_p.send_data( session_id, chunk_key1, [recv_pool_addr, recv_pool_addr2], _promise=True)) # send data to already transferred / transferring sender_ref_p.send_data(session_id, chunk_key1, [recv_pool_addr, recv_pool_addr2], _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) assert_array_equal( mock_data, recv_ref2.get_result_data(session_id, chunk_key1)) # send data to non-exist endpoint which causes error self.waitp( storage_client.put_object( session_id, chunk_key2, mock_data, [DataStorageDevice.SHARED_MEMORY])) sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr2, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(BrokenPipeError): self.get_result(5) def mocked_receive_data_part(*_): raise ChecksumMismatch with patch_method(MockReceiverActor.receive_data_part, new=mocked_receive_data_part): sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr, _promise=True) \ .then(lambda *s: test_actor.set_result(s)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(ChecksumMismatch): self.get_result(5)
def testReceiverWorker(self): pool_addr = f'localhost:{get_next_port()}' options.worker.spill_directory = tempfile.mkdtemp( prefix='mars_test_receiver_') session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) serialized_arrow_data = dataserializer.serialize(mock_data) data_size = serialized_arrow_data.total_bytes dumped_mock_data = dataserializer.dumps(mock_data) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) chunk_key3 = str(uuid.uuid4()) chunk_key4 = str(uuid.uuid4()) chunk_key5 = str(uuid.uuid4()) chunk_key6 = str(uuid.uuid4()) chunk_key7 = str(uuid.uuid4()) chunk_key8 = str(uuid.uuid4()) chunk_key9 = str(uuid.uuid4()) with start_transfer_test_pool(address=pool_addr, plasma_size=self.plasma_storage_size) as pool, \ self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client receiver_ref = test_actor.promise_ref( pool.create_actor(ReceiverWorkerActor, uid=str(uuid.uuid4()))) receiver_manager_ref = test_actor.promise_ref( ReceiverManagerActor.default_uid()) # SCENARIO 1: create two writers and write with chunks self.waitp( receiver_ref.create_data_writers(session_id, [chunk_key1, chunk_key2], [data_size] * 2, test_actor, _promise=True)) receiver_ref.receive_data_part( session_id, [chunk_key1, chunk_key2], [True, False], dumped_mock_data, dumped_mock_data[:len(dumped_mock_data) // 2]) self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) self.assertEqual(receiver_ref.check_status(session_id, chunk_key2), ReceiveStatus.RECEIVING) receiver_ref.receive_data_part( session_id, [chunk_key2], [True], dumped_mock_data[len(dumped_mock_data) // 2:]) self.assertEqual(receiver_ref.check_status(session_id, chunk_key2), ReceiveStatus.RECEIVED) assert_array_equal( storage_client.get_object(session_id, chunk_key1, [DataStorageDevice.SHARED_MEMORY], _promise=False), mock_data) assert_array_equal( storage_client.get_object(session_id, chunk_key2, [DataStorageDevice.SHARED_MEMORY], _promise=False), mock_data) # SCENARIO 2: one of the writers failed to create, # will test both existing and non-existing keys old_create_writer = StorageClient.create_writer def _create_writer_with_fail(self, session_id, chunk_key, *args, **kwargs): if chunk_key == fail_key: if kwargs.get('_promise', True): return promise.finished(*build_exc_info(ValueError), **dict(_accept=False)) else: raise ValueError return old_create_writer(self, session_id, chunk_key, *args, **kwargs) with patch_method(StorageClient.create_writer, new=_create_writer_with_fail), \ self.assertRaises(ValueError): fail_key = chunk_key4 self.waitp( receiver_ref.create_data_writers( session_id, [chunk_key3, chunk_key4, chunk_key5], [data_size] * 3, test_actor, ensure_cached=False, _promise=True)) self.assertEqual(receiver_ref.check_status(session_id, chunk_key3), ReceiveStatus.NOT_STARTED) self.assertEqual(receiver_ref.check_status(session_id, chunk_key4), ReceiveStatus.NOT_STARTED) self.assertEqual(receiver_ref.check_status(session_id, chunk_key5), ReceiveStatus.NOT_STARTED) with patch_method(StorageClient.create_writer, new=_create_writer_with_fail): fail_key = chunk_key2 self.waitp( receiver_ref.create_data_writers(session_id, [chunk_key2, chunk_key3], [data_size] * 2, test_actor, ensure_cached=False, _promise=True)) # SCENARIO 3: transfer timeout receiver_manager_ref.register_pending_keys(session_id, [chunk_key6]) self.waitp( receiver_ref.create_data_writers(session_id, [chunk_key6], [data_size], test_actor, timeout=1, _promise=True)) with self.assertRaises(TimeoutError): self.waitp( receiver_manager_ref.add_keys_callback(session_id, [chunk_key6], _promise=True)) # SCENARIO 4: cancelled transfer (both before and during transfer) receiver_manager_ref.register_pending_keys(session_id, [chunk_key7]) self.waitp( receiver_ref.create_data_writers(session_id, [chunk_key7], [data_size], test_actor, timeout=1, _promise=True)) receiver_ref.cancel_receive(session_id, [chunk_key2, chunk_key7]) with self.assertRaises(KeyError): receiver_ref.receive_data_part( session_id, [chunk_key7], [False], dumped_mock_data[:len(dumped_mock_data) // 2]) with self.assertRaises(KeyError): self.waitp( receiver_manager_ref.add_keys_callback(session_id, [chunk_key7], _promise=True)) # SCENARIO 5: sender halt and receiver is notified (reusing previous unsuccessful key) receiver_manager_ref.register_pending_keys(session_id, [chunk_key7]) mock_ref = pool.actor_ref(test_actor.uid, address='MOCK_ADDR') self.waitp( receiver_ref.create_data_writers(session_id, [chunk_key7], [data_size], mock_ref, timeout=1, _promise=True)) receiver_ref.notify_dead_senders(['MOCK_ADDR']) with self.assertRaises(WorkerDead): self.waitp( receiver_manager_ref.add_keys_callback(session_id, [chunk_key7], _promise=True)) # SCENARIO 6: successful transfer without promise receiver_ref.create_data_writers(session_id, [chunk_key8], [data_size], mock_ref, use_promise=False) receiver_ref.receive_data_part(session_id, [chunk_key8], [True], dumped_mock_data) self.assertEqual(receiver_ref.check_status(session_id, chunk_key8), ReceiveStatus.RECEIVED) assert_array_equal( storage_client.get_object(session_id, chunk_key8, [DataStorageDevice.SHARED_MEMORY], _promise=False), mock_data) # SCENARIO 7: failed transfer without promise with patch_method(StorageClient.create_writer, new=_create_writer_with_fail), \ self.assertRaises(ValueError): fail_key = chunk_key9 receiver_ref.create_data_writers(session_id, [chunk_key9], [data_size], mock_ref, use_promise=False)
def testReceiver(self): pool_addr = 'localhost:%d' % get_next_port() options.worker.spill_directory = os.path.join( tempfile.gettempdir(), 'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker))) session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) serialized_mock_data = dataserializer.dumps(mock_data) serialized_crc32 = zlib.crc32(serialized_mock_data) chunk_key1 = str(uuid.uuid4()) chunk_key2 = str(uuid.uuid4()) chunk_key3 = str(uuid.uuid4()) chunk_key4 = str(uuid.uuid4()) chunk_key5 = str(uuid.uuid4()) chunk_key6 = str(uuid.uuid4()) with start_transfer_test_pool(address=pool_addr, plasma_size=self.plasma_storage_size) as pool: chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name()) mapper_ref = pool.actor_ref(PlasmaKeyMapActor.default_name()) receiver_ref = pool.create_actor(ReceiverActor, uid=str(uuid.uuid4())) store = PlasmaChunkStore(self._plasma_client, mapper_ref) # check_status on receiving and received self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.NOT_STARTED) write_spill_file(chunk_key1, mock_data) self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) os.unlink(build_spill_file_name(chunk_key1)) ref = store.put(session_id, chunk_key1, mock_data) data_size = store.get_actual_size(session_id, chunk_key1) chunk_holder_ref.register_chunk(session_id, chunk_key1) del ref self.assertEqual(receiver_ref.check_status(session_id, chunk_key1), ReceiveStatus.RECEIVED) with self.run_actor_test(pool) as test_actor: receiver_ref_p = test_actor.promise_ref(receiver_ref) # cancel on an un-run / missing result will result in nothing receiver_ref_p.cancel_receive(session_id, chunk_key2) # start creating writer receiver_ref_p.create_data_writer(session_id, chunk_key1, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVED)) receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVING)) receiver_ref_p.cancel_receive(session_id, chunk_key2) self.assertEqual(receiver_ref.check_status(session_id, chunk_key2), ReceiveStatus.NOT_STARTED) # test checksum error on receive_data_part receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data, 0) with self.assertRaises(ChecksumMismatch): self.get_result(5) # test checksum error on finish_receive receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key2, 0) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) with self.assertRaises(ChecksumMismatch): self.get_result(5) receiver_ref_p.cancel_receive(session_id, chunk_key2) # test intermediate cancellation receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.cancel_receive(session_id, chunk_key2) receiver_ref_p.receive_data_part(session_id, chunk_key2, serialized_mock_data[64:], serialized_crc32) with self.assertRaises(ExecutionInterrupted): self.get_result(5) # test transfer in memory receiver_ref_p.register_finish_callback(session_id, chunk_key3, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key3, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.receive_data_part(session_id, chunk_key3, serialized_mock_data[64:], serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key3, serialized_crc32) self.assertTupleEqual((), self.get_result(5)) receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, ReceiveStatus.RECEIVED)) # test transfer in spill file def mocked_store_create(*_): raise StoreFull with patch_method(PlasmaChunkStore.create, new=mocked_store_create): # test receive aborted receiver_ref_p.create_data_writer( session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key4, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) receiver_ref_p.cancel_receive(session_id, chunk_key4) with self.assertRaises(ExecutionInterrupted): self.get_result(5) # test receive into spill receiver_ref_p.create_data_writer( session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.receive_data_part(session_id, chunk_key4, serialized_mock_data, serialized_crc32) receiver_ref_p.finish_receive(session_id, chunk_key4, serialized_crc32) self.assertTupleEqual((), self.get_result(5)) # test intermediate error def mocked_store_create(*_): raise SpillNotConfigured with patch_method(PlasmaChunkStore.create, new=mocked_store_create): receiver_ref_p.create_data_writer( session_id, chunk_key5, data_size, test_actor, ensure_cached=False, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False), lambda *s: test_actor.set_result(s, accept=False, destroy=False)) with self.assertRaises(SpillNotConfigured): self.get_result(5) # test receive timeout receiver_ref_p.register_finish_callback(session_id, chunk_key6, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False, destroy=False)) receiver_ref_p.create_data_writer(session_id, chunk_key6, data_size, test_actor, timeout=2, _promise=True) \ .then(lambda *s: test_actor.set_result(s, destroy=False)) self.assertTupleEqual(self.get_result(5), (receiver_ref.address, None)) receiver_ref_p.receive_data_part(session_id, chunk_key6, serialized_mock_data[:64], zlib.crc32(serialized_mock_data[:64])) with self.assertRaises(TimeoutError): self.get_result(5)
def testPrepareQuota(self, *_): pinned = True orig_pin = SharedHolderActor.pin_data_keys def _mock_pin(self, session_id, chunk_keys, token): from mars.errors import PinDataKeyFailed if pinned: raise PinDataKeyFailed return orig_pin(self, session_id, chunk_keys, token) pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \ create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender') pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) pool.actor_ref(WorkerClusterInfoActor.default_uid()) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) metas = {modified_chunk.key: WorkerMeta( mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))} with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) start_time = time.time() execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(time.time())) \ .catch(lambda *exc: test_actor.set_result(exc, False)) def _delay_fun(): nonlocal pinned time.sleep(0.5) pinned = False threading.Thread(target=_delay_fun).start() finish_time = self.get_result() self.assertGreaterEqual(finish_time, start_time + 0.5)
def testQuota(self): def _raiser(*_, **__): raise ValueError local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool: pool.create_actor(WorkerClusterInfoActor, [local_pool_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_uid()) quota_ref = pool.create_actor(QuotaActor, 300, uid=QuotaActor.default_uid()) quota_ref.process_quota('non_exist') quota_ref.hold_quota('non_exist') quota_ref.release_quota('non_exist') with self.assertRaises(ValueError): quota_ref.request_quota('ERROR', 1000) self.assertTrue(quota_ref.request_quota('0', 100)) self.assertTrue(quota_ref.request_quota('0', 50)) self.assertTrue(quota_ref.request_quota('0', 200)) quota_ref.process_quota('0') self.assertIn('0', quota_ref.dump_data().proc_sizes) quota_ref.alter_allocation('0', 190, new_key=('0', 0)) self.assertEqual(quota_ref.dump_data().allocations[('0', 0)], 190) quota_ref.hold_quota(('0', 0)) self.assertIn(('0', 0), quota_ref.dump_data().hold_sizes) quota_ref.alter_allocation(('0', 0), new_key=('0', 1)) self.assertEqual(quota_ref.dump_data().allocations[('0', 1)], 190) with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(QuotaActor.default_uid()) ref.request_quota('1', 150, _promise=True) \ .then(lambda *_: test_actor.set_result(True)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) pool.sleep(0.5) self.assertFalse(quota_ref.request_quota('2', 50)) self.assertFalse(quota_ref.request_quota('3', 200)) self.assertFalse(quota_ref.request_quota('3', 180)) self.assertNotIn('2', quota_ref.dump_data().allocations) ref.cancel_requests(('1',), reject_exc=build_exc_info(OSError)) with self.assertRaises(OSError): self.get_result(5) with patch_method(QuotaActor._request_quota, new=_raiser): ref.request_quota('err_raise', 1, _promise=True) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(ValueError): self.get_result(5) ref.request_batch_quota({'err_raise': 1}, _promise=True) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(ValueError): self.get_result(5) self.assertNotIn('1', quota_ref.dump_data().requests) self.assertIn('2', quota_ref.dump_data().allocations) self.assertNotIn('3', quota_ref.dump_data().allocations) quota_ref.release_quotas([('0', 1)]) self.assertIn('3', quota_ref.dump_data().allocations) self.assertFalse(quota_ref.request_quota('4', 180)) quota_ref.alter_allocations(['3'], [50]) self.assertIn('4', quota_ref.dump_data().allocations) with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(QuotaActor.default_uid()) ref.request_quota('5', 50, _promise=True) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with patch_method(QuotaActor.alter_allocation, new=_raiser): quota_ref.release_quota('2') with self.assertRaises(ValueError): self.get_result(5)