def testDiskReadAndWritePacked(self, *_): test_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(EventsActor, uid=EventsActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) session_id = str(uuid.uuid4()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.DISK)) for handler._compress in self._get_compress_types(): data_key1 = str(uuid.uuid4()) storage_client.delete(session_id, [data_key1]) self.rm_spill_dirs() block_data1 = dataserializer.dumps(data1, compress=handler._compress) def _write_data(ser, writer): with writer: writer.write(ser) return writer.filename handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, packed=True, _promise=True) \ .then(functools.partial(_write_data, block_data1)) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) file_name = self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) self.assertTrue(os.path.exists(file_name)) def _read_data(reader): with reader: return dataserializer.loads(reader.read()) handler.create_bytes_reader(session_id, data_key1, packed=True, _promise=True) \ .then(_read_data) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1)
def _start_calc_pool(self): mock_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, backend='gevent', address=mock_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [mock_addr], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [mock_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(StatusActor, mock_addr, uid=StatusActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) shared_holder_ref = pool.create_actor( SharedHolderActor, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(CpuCalcActor, uid=CpuCalcActor.default_uid()) with self.run_actor_test(pool) as test_actor: try: yield pool, test_actor finally: shared_holder_ref.destroy()
def testDaemon(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=2, backend='gevent', distributor=MarsDistributor(2, 'w:0:'), address=mock_scheduler_addr) as pool: daemon_ref = pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor, uid='w:1:DaemonSleeperActor') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper') test_actor = pool.create_actor(DaemonTestActor) daemon_ref.register_actor_callback( test_actor, DaemonTestActor.handle_process_down_for_actors.__name__) test_actor.run_test_sleep(sleeper_ref, 10, _tell=True) self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.sleep(0.5) daemon_ref.kill_actor_process(sleeper_ref) # repeated kill shall not produce errors daemon_ref.kill_actor_process(sleeper_ref) self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.restart_process(1) daemon_ref.handle_process_down([1]) pool.sleep(1) self.assertTrue(pool.has_actor(sleeper_ref)) with self.assertRaises(WorkerProcessStopped): test_actor.get_result() test_actor.run_test_sleep(sleeper_ref, 1) pool.sleep(1.5) test_actor.get_result()
def testCalcProcessFailure(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=2, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_status=False) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid()) dispatch_ref = pool.actor_ref(DispatchActor.default_uid()) calc_ref = daemon_ref.create_actor( MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a') test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor') test_actor.run_simple_calc(session_id, _tell=True) pool.sleep(2) proc_id = pool.distributor.distribute(calc_ref.uid) daemon_ref.kill_actor_process(calc_ref) assert not daemon_ref.is_actor_process_alive(calc_ref) pool.restart_process(proc_id) daemon_ref.handle_process_down([proc_id]) with self.assertRaises(WorkerProcessStopped): self.wait_for_result(pool, test_actor) self.assertEqual(len(dispatch_ref.get_slots('cpu')), 1)
def testStopGraphCalc(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=2, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_status=False) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) calc_ref = daemon_ref.create_actor( MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a') test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor') test_actor.run_simple_calc(session_id, _tell=True) pool.sleep(2) proc_id = pool.distributor.distribute(calc_ref.uid) execution_ref.stop_execution(session_id, test_actor.get_graph_key(), _tell=True) while daemon_ref.is_actor_process_alive(calc_ref): pool.sleep(0.1) pool.restart_process(proc_id) daemon_ref.handle_process_down([proc_id]) with self.assertRaises(ExecutionInterrupted): self.wait_for_result(pool, test_actor)
def testSharedLoad(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) # load from bytes io disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK)) with disk_handler.create_bytes_writer( session_id, data_key1, ser_data1.total_bytes) as writer: ser_data1.write_to(writer) handler.load_from_bytes_io(session_id, data_key1, disk_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key1)), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) disk_handler.delete(session_id, data_key1) handler.delete(session_id, data_key1) ref_data2 = weakref.ref(data2) # load from object io proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY)) proc_handler.put_object(session_id, data_key2, data2) del data2 handler.load_from_object_io(session_id, data_key2, proc_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key2)), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)]) proc_handler.delete(session_id, data_key2) self.assertIsNone(ref_data2()) handler.delete(session_id, data_key2)
def testStorageManager(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) manager_ref = pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) self.assertEqual(list(manager_ref.get_data_locations(session_id, ['NON_EXIST'])[0]), []) manager_ref.register_data(session_id, [data_key1], (0, DataStorageDevice.SHARED_MEMORY), [1024], shapes=[(16, 8)]) manager_ref.register_data(session_id, [data_key1], (1, DataStorageDevice.PROC_MEMORY), [1024], shapes=[(16, 8)]) manager_ref.register_data(session_id, [data_key1], (0, DataStorageDevice.DISK), [2048]) self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK), (1, DataStorageDevice.PROC_MEMORY)], sorted(manager_ref.get_data_locations(session_id, [data_key1])[0])) self.assertEqual(2048, manager_ref.get_data_sizes(session_id, [data_key1])[0]) self.assertEqual((16, 8), manager_ref.get_data_shapes(session_id, [data_key1])[0]) manager_ref.register_data(session_id, [data_key2], (0, DataStorageDevice.SHARED_MEMORY), [1024]) manager_ref.register_data(session_id, [data_key2], (1, DataStorageDevice.PROC_MEMORY), [1024]) self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY), (1, DataStorageDevice.PROC_MEMORY)], sorted(manager_ref.get_data_locations(session_id, [data_key2])[0])) manager_ref.unregister_data(session_id, [data_key2], (0, DataStorageDevice.SHARED_MEMORY)) self.assertEqual([(1, DataStorageDevice.PROC_MEMORY)], sorted(manager_ref.get_data_locations(session_id, [data_key2])[0])) self.assertEqual(1024, manager_ref.get_data_sizes(session_id, [data_key2])[0]) self.assertEqual([data_key1], list(manager_ref.filter_exist_keys(session_id, [data_key1, data_key2, 'non-exist'], [(0, DataStorageDevice.SHARED_MEMORY)]))) manager_ref.unregister_data(session_id, [data_key2], (1, DataStorageDevice.PROC_MEMORY)) self.assertEqual(list(manager_ref.get_data_locations(session_id, [data_key2])[0]), []) self.assertIsNone(manager_ref.get_data_sizes(session_id, [data_key2])[0]) self.assertIsNone(manager_ref.get_data_shapes(session_id, data_key2)[0]) manager_ref.register_data(session_id, [data_key2], (1, DataStorageDevice.PROC_MEMORY), [1024]) manager_ref.handle_process_down([1]) self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)], sorted(manager_ref.get_data_locations(session_id, [data_key1])[0])) self.assertEqual(list(manager_ref.get_data_locations(session_id, [data_key2])[0]), []) self.assertIsNone(manager_ref.get_data_sizes(session_id, [data_key2])[0]) self.assertIsNone(manager_ref.get_data_shapes(session_id, [data_key2])[0])
def testSharedPutAndGet(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data2 = dataserializer.serialize(data2) bytes_data2 = ser_data2.to_buffer() session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) handler.put_objects(session_id, [data_key1], [data1]) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) assert_allclose(data1, handler.get_objects(session_id, [data_key1])[0]) handler.delete(session_id, [data_key1]) self.assertEqual( list( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), []) with self.assertRaises(KeyError): handler.get_objects(session_id, [data_key1]) handler.put_objects(session_id, [data_key2], [ser_data2], serialize=True) assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0]) handler.delete(session_id, [data_key2]) handler.put_objects(session_id, [data_key2], [bytes_data2], serialize=True) assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0]) handler.delete(session_id, [data_key2])
def testProcMemPutAndGet(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data2 = dataserializer.serialize(data2) bytes_data2 = ser_data2.to_buffer() session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( DataStorageDevice.PROC_MEMORY) handler.put_object(session_id, data_key1, data1) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, data_key1)), [(0, DataStorageDevice.PROC_MEMORY)]) assert_allclose(data1, handler.get_object(session_id, data_key1)) handler.delete(session_id, data_key1) self.assertIsNone( storage_manager_ref.get_data_locations(session_id, data_key1)) with self.assertRaises(KeyError): handler.get_object(session_id, data_key1) handler.put_object(session_id, data_key2, ser_data2, serialized=True) assert_allclose(data2, handler.get_object(session_id, data_key2)) handler.delete(session_id, data_key2) handler.put_object(session_id, data_key2, bytes_data2, serialized=True) assert_allclose(data2, handler.get_object(session_id, data_key2)) handler.delete(session_id, data_key2)
def testLoadStoreInOtherProcess(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=3, address=test_addr, distributor=MarsDistributor(3)) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1') pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2') pool.create_actor(IORunnerActor, lock_free=True, dispatched=False, uid=IORunnerActor.gen_uid(1)) test_ref = pool.create_actor(OtherProcessTestActor, uid='w:0:OtherProcTest') def _get_result(): start_time = time.time() while test_ref.get_result() is None: pool.sleep(0.5) if time.time() - start_time > 10: raise TimeoutError test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY), (1, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (2, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result()
def testWorkerProcessRestart(self): with self._start_worker_process() as (pool, worker_endpoint): daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid(), address=worker_endpoint) dispatch_ref = pool.actor_ref(DispatchActor.default_uid(), address=worker_endpoint) cpu_slots = dispatch_ref.get_slots('cpu') calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint) daemon_ref.kill_actor_process(calc_ref) check_start = time.time() while not daemon_ref.is_actor_process_alive(calc_ref): gevent.sleep(0.1) if time.time() - check_start > 10: raise TimeoutError('Check process restart timeout')
def testWorkerProcessRestart(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() try: with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [mock_scheduler_addr], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_uid()) proc = subprocess.Popen([ sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--schedulers', mock_scheduler_addr, '--cpu-procs', '1', '--cache-mem', '10m', '--spill-dir', self._spill_dir, '--ignore-avail-mem' ]) worker_endpoint = self._wait_worker_ready(proc, resource_ref) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid(), address=worker_endpoint) dispatch_ref = pool.actor_ref(DispatchActor.default_uid(), address=worker_endpoint) cpu_slots = dispatch_ref.get_slots('cpu') calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint) daemon_ref.kill_actor_process(calc_ref) check_start = time.time() while not daemon_ref.is_actor_process_alive(calc_ref): gevent.sleep(0.1) if time.time() - check_start > 10: raise TimeoutError('Check process restart timeout') finally: if proc.poll() is None: proc.send_signal(signal.SIGINT) check_time = time.time() while True: time.sleep(0.1) if proc.poll( ) is not None or time.time() - check_time >= 5: break if proc.poll() is None: proc.kill() if os.path.exists(options.worker.plasma_socket): os.unlink(options.worker.plasma_socket)
def testSharedLoadFromObjects(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) # load from object io ref_data1 = weakref.ref(data1) proc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) proc_handler.put_objects(session_id, [data_key1], [data1]) del data1 handler.load_from_object_io(session_id, [data_key1], proc_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)]) proc_handler.delete(session_id, [data_key1]) self.assertIsNone(ref_data1()) handler.delete(session_id, [data_key1])
def _start_shared_holder_pool(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) yield pool, test_actor
def testCudaMemPutAndGet(self): test_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(CudaHolderActor) test_data = np.random.random((10, 10)) test_suites = [ (test_data, cp.ndarray, cp.asnumpy, assert_allclose), (pd.Series(test_data.flatten()), cudf.Series, lambda o: o.to_pandas(), pd.testing.assert_series_equal), (pd.DataFrame(dict(col=test_data.flatten())), cudf.DataFrame, lambda o: o.to_pandas(), pd.testing.assert_frame_equal), ] for data, cuda_type, move_to_mem, assert_obj_equal in test_suites: ser_data = dataserializer.serialize(data) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.CUDA)) handler.put_objects(session_id, [data_key1], [data]) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.CUDA)]) self.assertIsInstance(handler.get_objects(session_id, [data_key1])[0], cuda_type) assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key1])[0])) handler.delete(session_id, [data_key1]) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), []) with self.assertRaises(KeyError): handler.get_objects(session_id, [data_key1]) handler.put_objects(session_id, [data_key2], [ser_data], serialize=True) self.assertIsInstance(handler.get_objects(session_id, [data_key2])[0], cuda_type) assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key2])[0])) handler.delete(session_id, [data_key2])
def create_standard_actors(cls, pool, address, quota_size=None, with_daemon=True, with_status=True, with_resource=False): quota_size = quota_size or (1024 * 1024) pool.create_actor(SchedulerClusterInfoActor, [address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) if with_resource: pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) if with_daemon: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) if with_status: pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) pool.create_actor(SharedHolderActor, cls.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, quota_size, uid=MemQuotaActor.default_uid()) pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
def testClientReadAndWrite(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: options.worker.lock_free_fileio = True pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client file_names = [] def _write_data(ser, writer): file_names.append(writer.filename) self.assertEqual(writer.nbytes, ser_data1.total_bytes) with writer: ser.write_to(writer) # test creating non-promised writer and write with storage_client.create_writer(session_id, data_key1, ser_data1.total_bytes, (DataStorageDevice.DISK, ), _promise=False) as writer: _write_data(ser_data1, writer) self.assertTrue(os.path.exists(file_names[0])) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) storage_client.delete(session_id, [data_key1]) # test creating promised writer and write file_names[:] = [] self.waitp( storage_client.create_writer( session_id, data_key2, ser_data1.total_bytes, (DataStorageDevice.DISK, )).then( functools.partial(_write_data, ser_data1))) self.assertTrue(os.path.exists(file_names[0])) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.DISK)]) def _read_data(reader): with reader: return dataserializer.deserialize(reader.read()) # test creating reader when data exist in location result = self.waitp( storage_client.create_reader( session_id, data_key2, (DataStorageDevice.DISK, )).then(_read_data))[0] assert_allclose(result, data1) # test creating reader when no data in location (should raise) with self.assertRaises(IOError): storage_client.create_reader( session_id, data_key2, (DataStorageDevice.SHARED_MEMORY, ), _promise=False) # test creating reader when copy needed self.waitp( storage_client.create_reader( session_id, data_key2, (DataStorageDevice.SHARED_MEMORY, )).then(_read_data)) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) storage_client.delete(session_id, [data_key2]) while os.path.exists(file_names[0]): test_actor.ctx.sleep(0.05) self.assertFalse(os.path.exists(file_names[0]))
def testClientSpill(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client idx = 0 shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) proc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) def _fill_data(): i = 0 for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)): try: shared_handler.put_objects(session_id, [key], [data]) except StorageFull: break return i + idx idx = _fill_data() # test copying non-existing keys storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(KeyError): self.get_result(5) # test copying into containing locations storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) # test unsuccessful copy when no data at target def _mock_load_from(*_, **__): return promise.finished(*build_exc_info(SystemError), _accept=False) with patch_method(StorageHandler.load_from, _mock_load_from), \ self.assertRaises(SystemError): storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) # test successful copy for multiple objects storage_client.delete(session_id, [data_keys[idx - 1]]) ref_data = weakref.ref(data_list[idx]) ref_data2 = weakref.ref(data_list[idx + 1]) proc_handler.put_objects(session_id, data_keys[idx:idx + 2], data_list[idx:idx + 2]) data_list[idx:idx + 2] = [None, None] storage_client.copy_to(session_id, data_keys[idx:idx + 2], [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) proc_handler.delete(session_id, data_keys[idx:idx + 2]) self.assertEqual( storage_manager_ref.get_data_locations( session_id, data_keys[idx:idx + 2]), [{(0, DataStorageDevice.SHARED_MEMORY)}, {(0, DataStorageDevice.DISK)}]) self.assertIsNone(ref_data()) self.assertIsNone(ref_data2()) # test copy with spill idx += 2 proc_handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])
def testClientPutAndGet(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor') session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] data_dict = dict(zip(data_keys, data_list)) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client # check batch object put with size exceeds storage_client.put_objects(session_id, data_keys, data_list, [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) locations = storage_client.get_data_locations( session_id, data_keys) loc_to_keys = defaultdict(list) for key, location in zip(data_keys, locations): self.assertEqual(len(location), 1) loc_to_keys[list(location)[0][-1]].append(key) self.assertGreater( len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1) self.assertGreater( len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1) # check get object with all cases with self.assertRaises(IOError): first_shared_key = loc_to_keys[ DataStorageDevice.SHARED_MEMORY][0] storage_client.get_object(session_id, first_shared_key, [DataStorageDevice.PROC_MEMORY], _promise=False) shared_objs = storage_client.get_objects( session_id, [first_shared_key], [DataStorageDevice.SHARED_MEMORY], _promise=False) self.assertEqual(len(shared_objs), 1) assert_allclose(shared_objs[0], data_dict[first_shared_key]) storage_client.get_object(session_id, first_shared_key, [DataStorageDevice.PROC_MEMORY], _promise=True) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data_dict[first_shared_key]) storage_client.delete(session_id, data_keys) time.sleep(0.5) ref = weakref.ref(data_dict[data_keys[0]]) storage_client.put_objects(session_id, data_keys[:1], [ref()], [DataStorageDevice.SHARED_MEMORY]) data_list[:] = [] data_dict.clear() self.assertIsNone(ref())
def testSharedSpill(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) holder_ref = pool.create_actor( SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.randint(0, 32767, (655360,), np.int16) for _ in range(20)] data_keys = [str(uuid.uuid4()) for _ in range(20)] storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) idx = 0 def _fill_data(): i = 0 for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)): try: handler.put_objects(session_id, [key], [data]) except StorageFull: break return i + idx def _do_spill(): data_size = storage_manager_ref.get_data_sizes(session_id, [data_keys[0]])[0] handler.spill_size(2 * data_size) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) # test lift data key idx = _fill_data() handler.lift_data_keys(session_id, [data_keys[0]]) _do_spill() self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]), [(0, DataStorageDevice.DISK)]) handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) idx += 1 # test pin data key idx = _fill_data() holder_ref.lift_data_keys(session_id, [data_keys[0]], last=False) pin_token = str(uuid.uuid4()) pinned_keys = handler.pin_data_keys(session_id, (data_keys[0],), pin_token) self.assertIn(data_keys[0], pinned_keys) _do_spill() self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]), [(0, DataStorageDevice.DISK)]) handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) idx += 1 # test unpin data key idx = _fill_data() handler.unpin_data_keys(session_id, (data_keys[0],), pin_token) _do_spill() self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.DISK)])
def testDiskReadAndWriteMerger(self): import logging logging.basicConfig(level=logging.DEBUG) test_addr = f'127.0.0.1:{get_next_port()}' options.worker.filemerger.max_file_size = 2400 options.worker.filemerger.concurrency = 16 with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(EventsActor, uid=EventsActor.default_uid()) disk_file_merger_ref = pool.create_actor( DiskFileMergerActor, uid=DiskFileMergerActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) session_id = str(uuid.uuid4()) data_count = 30 data = [ np.random.rand(random.randint(10, 30), random.randint(10, 30)) for _ in range(data_count) ] ser_data = [dataserializer.serialize(d) for d in data] storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.DISK)) for handler._compress in self._get_compress_types(): data_keys = [str(uuid.uuid4()) for _ in range(data_count)] promises = [] for idx in range(data_count): block_data = dataserializer.dumps( data[idx], compress=handler._compress) def _write_data(ser, writer): with writer: writer.write(ser) return writer.filename promises.append( handler.create_bytes_writer(session_id, data_keys[idx], ser_data[idx].total_bytes, packed=True, with_merger_lock=True, _promise=True).then( functools.partial( _write_data, block_data))) promise.all_(promises).then( lambda *_: test_actor.set_result(0), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(50) for key in data_keys: self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [key])[0]), [(0, DataStorageDevice.DISK)]) dump_result = disk_file_merger_ref.dump_info() written_files = list(dump_result[2]) for fn in written_files: self.assertTrue(os.path.exists(fn)) data_store = [None] * len(data) promises = [] for idx in range(data_count): def _read_data(reader, idx): with reader: data_store[idx] = dataserializer.loads( reader.read()) promises.append( handler.create_bytes_reader(session_id, data_keys[idx], with_merger_lock=True, packed=True, _promise=True).then( functools.partial( _read_data, idx=idx))) promise.all_(promises).then( lambda *_: test_actor.set_result(0), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(50) for true_data, read_data in zip(data, data_store): assert_allclose(true_data, read_data) data_store = [None] * len(data) promises = [] for idx in range(data_count): def _read_data(reader, idx): with reader: data_store[idx] = dataserializer.deserialize( reader.read()) promises.append( handler.create_bytes_reader(session_id, data_keys[idx], _promise=True).then( functools.partial( _read_data, idx=idx))) promise.all_(promises).then( lambda *_: test_actor.set_result(0), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(50) for true_data, read_data in zip(data, data_store): assert_allclose(true_data, read_data) storage_client.delete(session_id, data_keys) pool.sleep(0.1) for fn in written_files: self.assertFalse(os.path.exists(fn))
def post_create(self): super(DaemonSleeperActor, self).__init__() self._daemon_ref = self.promise_ref(WorkerDaemonActor.default_uid()) self._daemon_ref.register_process(self.ref(), os.getpid(), _tell=True)
def testSharedReadAndWrite(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() io_size = dataserializer.HEADER_LENGTH * 2 with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((100, 100)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) def _write_data(ser, writer): self.assertEqual(writer.nbytes, ser_data1.total_bytes) with writer: ser.write_to(writer) handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1)) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) handler.delete(session_id, [data_key1]) def _write_data(ser, writer): with writer: for start in range(0, len(ser), io_size): writer.write(ser[start:start + io_size]) handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1.to_buffer())) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) def _read_data_all(reader): with reader: return dataserializer.deserialize(reader.read()) handler.create_bytes_reader(session_id, data_key1, _promise=True) \ .then(_read_data_all) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1) def _read_data_batch(reader): bio = BytesIO() with reader: while True: buf = reader.read(io_size) if buf: bio.write(buf) else: break return dataserializer.deserialize(bio.getvalue()) handler.create_bytes_reader(session_id, data_key1, _promise=True) \ .then(_read_data_batch) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1) handler.delete(session_id, [data_key1])
def testDiskReadAndWrite(self, *_): test_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) data2 = np.random.random((10, 10)) ser_data2 = dataserializer.serialize(data2) session_id = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.DISK)) for handler._compress in self._get_compress_types(): data_key1 = str(uuid.uuid4()) data_key2 = (str(uuid.uuid4()), 'subkey') storage_client.delete(session_id, [data_key1]) storage_client.delete(session_id, [data_key2]) self.rm_spill_dirs() def _write_data(ser, writer): self.assertEqual(writer.nbytes, ser.total_bytes) with writer: ser.write_to(writer) return writer.filename def _read_data(reader): with reader: return dataserializer.deserialize(reader.read()) # test normal file write handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1)) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) file_name = self.get_result(5) self.assertTrue(os.path.exists(file_name)) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) # test write existing (this should produce an error) handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1)) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(StorageDataExists): self.get_result(5) # test writing with unreferenced file storage_manager_ref.unregister_data( session_id, [data_key1], (0, DataStorageDevice.DISK)) handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1)) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) file_name = self.get_result(5) self.assertTrue(os.path.exists(file_name)) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) # test reading and verifying written data handler.create_bytes_reader(session_id, data_key1, _promise=True) \ .then(_read_data) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1) # test unregistering data handler.delete(session_id, [data_key1]) while os.path.exists(file_name): test_actor.ctx.sleep(0.05) self.assertFalse(os.path.exists(file_name)) # test reading and writing with tuple keys handler.create_bytes_writer(session_id, data_key2, ser_data2.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data2)) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.DISK)]) handler.create_bytes_reader(session_id, data_key2, _promise=True) \ .then(_read_data) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data2)
def testSharedLoadFromBytes(self, *_): import logging logging.basicConfig(level=logging.DEBUG) test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) # load from bytes io disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK)) with disk_handler.create_bytes_writer( session_id, data_key1, ser_data1.total_bytes) as writer: ser_data1.write_to(writer) handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) disk_handler.delete(session_id, [data_key1]) handler.delete(session_id, [data_key1]) # load from bytes io till no capacity data_list = [np.random.randint(0, 32767, (655360,), np.int16) for _ in range(20)] data_keys = [str(uuid.uuid4()) for _ in range(20)] for key, data in zip(data_keys, data_list): ser_data = dataserializer.serialize(data) with disk_handler.create_bytes_writer( session_id, key, ser_data.total_bytes) as writer: ser_data.write_to(writer) handler.load_from_bytes_io(session_id, data_keys, disk_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) affected_keys = set() try: self.get_result(5) except StorageFull as ex: affected_keys.update(ex.affected_keys) storage_client.delete(session_id, data_keys, [DataStorageDevice.DISK]) self.assertLess(len(affected_keys), len(data_keys)) self.assertGreater(len(affected_keys), 1) for k, size in zip(data_keys, storage_client.get_data_sizes(session_id, data_keys)): if k in affected_keys: self.assertIsNone(size) else: self.assertIsNotNone(size)