def setUp(self): scheduler_port = str(get_next_port()) proc_worker = subprocess.Popen([ sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--cpu-procs', '2', '--level', 'debug', '--cache-mem', '16m', '--schedulers', '127.0.0.1:' + scheduler_port, '--ignore-avail-mem' ]) proc_scheduler = subprocess.Popen([ sys.executable, '-m', 'mars.scheduler', '-H', '127.0.0.1', '--level', 'debug', '-p', scheduler_port, '--format', '%(asctime)-15s %(message)s' ]) self.scheduler_port = scheduler_port self.proc_worker = proc_worker self.proc_scheduler = proc_scheduler time.sleep(2) actor_client = new_client() check_time = time.time() while True: try: resource_ref = actor_client.actor_ref( ResourceActor.default_name(), address='127.0.0.1:' + scheduler_port) if actor_client.has_actor(resource_ref): break else: raise SystemError('Check meta_timestamp timeout') except: if time.time() - check_time > 10: raise time.sleep(1) check_time = time.time() while True: if not resource_ref.get_worker_count(): time.sleep(0.5) self.check_process_statuses() if time.time() - check_time > 20: raise SystemError('Check meta_timestamp timeout') else: break self.exceptions = gevent.hub.Hub.NOT_ERROR gevent.hub.Hub.NOT_ERROR = (Exception, )
def testLoadStoreInOtherProcess(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=2, address=test_addr, distributor=MarsDistributor(2)) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor') pool.create_actor(IORunnerActor, lock_free=True, dispatched=False, uid=IORunnerActor.gen_uid(1)) test_ref = pool.create_actor(OtherProcessTestActor, uid='w:0:OtherProcTest') test_ref.run_copy_global_to_proc_test(_tell=True) start_time = time.time() while test_ref.get_result() is None: pool.sleep(0.5) if time.time() - start_time > 10: raise TimeoutError test_ref.run_copy_proc_to_global_test(_tell=True) start_time = time.time() while test_ref.get_result() is None: pool.sleep(0.5) if time.time() - start_time > 10: raise TimeoutError
async def actor_pool(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await mo.create_actor_pool('127.0.0.1', n_process=0, subprocess_start_method=start_method) async with pool: web_config = { 'host': '127.0.0.1', 'port': get_next_port(), 'web_handlers': { TestAPIHandler.get_root_pattern(): TestAPIHandler }, } await mo.create_actor(WebActor, web_config, address=pool.external_address) yield pool, web_config['port']
def testMemQuotaAllocation(self): from mars import resource from mars.utils import AttributeDict mock_mem_stat = AttributeDict( dict(total=300, available=50, used=0, free=50)) local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \ patch_method(resource.virtual_memory, new=lambda: mock_mem_stat): pool.create_actor(WorkerClusterInfoActor, schedulers=[local_pool_addr], uid=WorkerClusterInfoActor.default_name()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(ProcessHelperActor, uid=ProcessHelperActor.default_name()) quota_ref = pool.create_actor(MemQuotaActor, 300, refresh_time=0.1, uid=MemQuotaActor.default_name()) time_recs = [] with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(quota_ref) time_recs.append(time.time()) def actual_exec(x): ref.release_quota(x) time_recs.append(time.time()) test_actor.set_result(None) ref.request_quota('req', 100, _promise=True) \ .then(functools.partial(actual_exec, 'req')) pool.sleep(0.5) mock_mem_stat['available'] = 150 mock_mem_stat['free'] = 150 self.get_result(2) self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
def testCudaMemPutAndGet(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(CudaHolderActor) test_data = np.random.random((10, 10)) test_suites = [ (test_data, cp.ndarray, cp.asnumpy, assert_allclose), (pd.Series(test_data.flatten()), cudf.Series, lambda o: o.to_pandas(), pd.testing.assert_series_equal), (pd.DataFrame(dict(col=test_data.flatten())), cudf.DataFrame, lambda o: o.to_pandas(), pd.testing.assert_frame_equal), ] for data, cuda_type, move_to_mem, assert_obj_equal in test_suites: ser_data = dataserializer.serialize(data) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.CUDA)) handler.put_objects(session_id, [data_key1], [data]) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.CUDA)]) self.assertIsInstance(handler.get_objects(session_id, [data_key1])[0], cuda_type) assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key1])[0])) handler.delete(session_id, [data_key1]) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), []) with self.assertRaises(KeyError): handler.get_objects(session_id, [data_key1]) handler.put_objects(session_id, [data_key2], [ser_data], serialize=True) self.assertIsInstance(handler.get_objects(session_id, [data_key2])[0], cuda_type) assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key2])[0])) handler.delete(session_id, [data_key2])
def testSendTargets(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=WorkerDistributor(2)) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) result_key = result_tensor.chunks[0].key pool.create_actor(MockSenderActor, mock_data + np.ones((4, )), 'out', uid='w:mock_sender') with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, send_addresses={result_key: (pool_address,)}, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testExecuteWorker(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() try: with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[mock_scheduler_addr], uid=SchedulerClusterInfoActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_name()) proc = subprocess.Popen([ sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--schedulers', mock_scheduler_addr, '--cpu-procs', '1', '--cache-mem', '10m', '--spill-dir', self._spill_dir, '--ignore-avail-mem' ]) worker_endpoint = self._wait_worker_ready(proc, resource_ref) test_ref = pool.create_actor(WorkerProcessTestActor) test_ref.run_test(worker_endpoint, _tell=True) check_time = time.time() while not test_ref.get_reply(): gevent.sleep(0.1) if time.time() - check_time > 20: raise TimeoutError('Check reply timeout') finally: if proc.poll() is None: proc.send_signal(signal.SIGINT) check_time = time.time() while True: time.sleep(0.1) if proc.poll( ) is not None or time.time() - check_time >= 5: break if proc.poll() is None: proc.kill() if os.path.exists(options.worker.plasma_socket): os.unlink(options.worker.plasma_socket)
def testFailoverMessage(self): mock_session_id = str(uuid.uuid4()) mock_graph_key = str(uuid.uuid4()) mock_chunk_key = str(uuid.uuid4()) addr = '127.0.0.1:%d' % get_next_port() mock_worker_addr = '127.0.0.1:54132' options.scheduler.worker_blacklist_time = 0.5 with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.default_name()) session_manager_ref = pool.create_actor( SessionManagerActor, uid=SessionManagerActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) chunk_meta_ref = pool.create_actor( ChunkMetaActor, uid=ChunkMetaActor.default_name()) session_ref = pool.actor_ref(session_manager_ref.create_session(mock_session_id)) chunk_meta_ref.set_chunk_meta(mock_session_id, mock_chunk_key, size=80, shape=(10,), workers=(mock_worker_addr,)) with mock.patch(GraphActor.__module__ + '.' + GraphActor.__name__, new=MockGraphActor): session_ref.submit_tensor_graph(None, mock_graph_key) graph_ref = pool.actor_ref(GraphActor.gen_name(mock_session_id, mock_graph_key)) expire_time = time.time() - options.scheduler.status_timeout - 1 resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=expire_time)) resource_ref.detect_dead_workers(_tell=True) pool.sleep(0.2) _, removes, lost_chunks = graph_ref.get_worker_change_args() self.assertListEqual(removes, [mock_worker_addr]) self.assertListEqual(lost_chunks, [mock_chunk_key]) self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta()) resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time())) self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta()) pool.sleep(0.4) resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time())) self.assertIn(mock_worker_addr, resource_ref.get_workers_meta())
def _start_worker_process(self, no_cuda=True, cuda_device=None): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() try: with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [mock_scheduler_addr], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_uid()) args = [ sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--schedulers', mock_scheduler_addr, '--cpu-procs', '1', '--cache-mem', '10m', '--spill-dir', self._spill_dir, '--ignore-avail-mem' ] env = os.environ.copy() if no_cuda: args.append('--no-cuda') else: env['CUDA_VISIBLE_DEVICES'] = cuda_device proc = subprocess.Popen(args, env=env) worker_endpoint = self._wait_worker_ready(proc, resource_ref) yield pool, worker_endpoint finally: if proc.poll() is None: proc.send_signal(signal.SIGINT) check_time = time.time() while True: time.sleep(0.1) if proc.poll( ) is not None or time.time() - check_time >= 5: break if proc.poll() is None: proc.kill() if os.path.exists(options.worker.plasma_socket): os.unlink(options.worker.plasma_socket)
def testReExecuteExisting(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc') pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder') import mars.tensor as mt arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) result_tensor = get_tiled(result_tensor) def _validate(_): data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testWorkerProcessRestart(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() try: with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: pool.create_actor(ClusterInfoActor, schedulers=[mock_scheduler_addr], uid=ClusterInfoActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) proc = subprocess.Popen([sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--schedulers', mock_scheduler_addr, '--cpu-procs', '1', '--cache-mem', '10m', '--spill-dir', self._spill_dir, '--ignore-avail-mem']) worker_endpoint = self._wait_worker_ready(proc, resource_ref) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(), address=worker_endpoint) dispatch_ref = pool.actor_ref(DispatchActor.default_name(), address=worker_endpoint) cpu_slots = dispatch_ref.get_slots('cpu') calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint) daemon_ref.kill_actor_process(calc_ref) check_start = time.time() while not daemon_ref.is_actor_process_alive(calc_ref): gevent.sleep(0.1) if time.time() - check_start > 10: raise TimeoutError('Check process restart timeout') finally: if proc.poll() is None: proc.send_signal(signal.SIGINT) check_time = time.time() while True: time.sleep(0.1) if proc.poll() is not None or time.time() - check_time >= 5: break if proc.poll() is None: proc.kill() if os.path.exists(options.worker.plasma_socket): os.unlink(options.worker.plasma_socket)
def testEvents(self, *_): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: events_ref = pool.create_actor(EventsActor) event1 = events_ref.add_single_event(EventCategory.RESOURCE, EventLevel.WARNING, ResourceEventType.MEM_HIGH, 'test_owner') self.assertIsNotNone(event1) event2 = events_ref.add_open_event(EventCategory.PROCEDURE, EventLevel.NORMAL, ProcedureEventType.CPU_CALC, 'test_owner2') self.assertIsNotNone(event2) time.sleep(1) proc_events = events_ref.query_by_time(EventCategory.RESOURCE) self.assertEqual(len(proc_events), 0) proc_events = events_ref.query_by_time(EventCategory.PROCEDURE) self.assertEqual(len(proc_events), 1) events_ref.close_event(event2) proc_events = events_ref.query_by_time(EventCategory.PROCEDURE) self.assertGreater(proc_events[0].time_end, proc_events[0].time_start) # repeated closing shall not cause any problems events_ref.close_event(event2) reloaded = pickle.loads(pickle.dumps(proc_events[0])) self.assertEqual(reloaded.event_id, proc_events[0].event_id) with EventContext(events_ref, EventCategory.PROCEDURE, EventLevel.NORMAL, ProcedureEventType.CPU_CALC, 'test_owner3'): proc_events = events_ref.query_by_time(EventCategory.PROCEDURE) self.assertIsNone(proc_events[-1].time_end) self.assertIsNotNone(proc_events[-1].time_end)
async def actor_pool(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await mo.create_actor_pool('127.0.0.1', n_process=0, subprocess_start_method=start_method) async with pool: web_config = { 'host': '127.0.0.1', 'port': get_next_port(), 'web_handlers': { '/api': MarsApiEntryHandler, TestAPIHandler.get_root_pattern(): TestAPIHandler, }, 'extra_discovery_modules': [ 'mars.services.web.tests.extra_handler' ] } await mo.create_actor( WebActor, web_config, address=pool.external_address) yield pool, web_config['port']
def testKVStoreActor(self): etcd_port = get_next_port() proc_helper = EtcdProcessHelper(port_range_start=etcd_port) options.kv_store = 'etcd://127.0.0.1:%s' % etcd_port with proc_helper.run(), create_actor_pool(n_process=1, backend='gevent') as pool: store_ref = pool.create_actor(KVStoreActor, uid=KVStoreActor.default_name()) store_ref.write('/node/v1', 'value1') store_ref.write('/node/v2', 'value2') store_ref.write_batch([ ('/node/v2', 'value2'), ('/node/v3', 'value3'), ]) self.assertEqual(store_ref.read('/node/v1').value, 'value1') self.assertListEqual([ v.value for v in store_ref.read_batch(['/node/v2', '/node/v3']) ], ['value2', 'value3'])
async def test_meta_web_api(): pool = await mo.create_actor_pool('127.0.0.1', n_process=0) web_port = get_next_port() async with pool: config = { "services": ["cluster", "session", "meta", "web"], "cluster": { "backend": "fixed", "lookup_address": pool.external_address, }, "meta": { "store": "dict" }, "web": { "port": web_port, } } await start_services(NodeRole.SUPERVISOR, config, address=pool.external_address) session_id = 'test_session' session_api = await SessionAPI.create(pool.external_address) await session_api.create_session(session_id) t = mt.random.rand(10, 10) t = tile(t) meta_api = await MetaAPI.create(session_id, pool.external_address) web_api = WebMetaAPI(session_id, f'http://localhost:{web_port}') await meta_api.set_chunk_meta(t.chunks[0], bands=[(pool.external_address, 'numa-0') ]) meta = await web_api.get_chunk_meta(t.chunks[0].key, fields=['shape', 'bands']) assert set(meta.keys()) == {'shape', 'bands'} with pytest.raises(KeyError): await web_api.get_chunk_meta('non-exist-key')
def testEmptyGraph(self, *_): session_id = str(uuid.uuid4()) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.default_uid()) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) graph_key = str(uuid.uuid4()) serialized_graph = serialize_graph(DAG()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_uid(session_id, graph_key)) graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.SUCCEEDED)
def _start_shared_holder_pool(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) yield pool, test_actor
def testStatus(self): pool_address = '127.0.0.1:%d' % get_next_port() old_spill_dir = options.worker.spill_directory dir_name = options.worker.spill_directory = tempfile.mkdtemp( prefix='temp-mars-spill-') try: with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: pool.create_actor(SchedulerClusterInfoActor, [pool_address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [pool_address], uid=WorkerClusterInfoActor.default_uid()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) status_ref = pool.create_actor(StatusActor, pool_address, uid=StatusActor.default_uid()) status_ref.enable_status_upload() status_ref.update_slots(dict(cpu=4)) status_ref.update_stats(dict(min_est_finish_time=10)) def delay_read(): gevent.sleep(1.5) return resource_ref.get_workers_meta() gl = gevent.spawn(delay_read) gl.join() v = gl.value self.assertIsNotNone(v) pool.destroy_actor(status_ref) finally: options.worker.spill_directory = old_spill_dir shutil.rmtree(dir_name)
def testSharedPutAndGet(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data2 = dataserializer.serialize(data2) bytes_data2 = ser_data2.to_buffer() session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) handler.put_objects(session_id, [data_key1], [data1]) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) assert_allclose(data1, handler.get_objects(session_id, [data_key1])[0]) handler.delete(session_id, [data_key1]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), []) with self.assertRaises(KeyError): handler.get_objects(session_id, [data_key1]) handler.put_objects(session_id, [data_key2], [ser_data2], serialize=True) assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0]) handler.delete(session_id, [data_key2]) handler.put_objects(session_id, [data_key2], [bytes_data2], serialize=True) assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0]) handler.delete(session_id, [data_key2])
def testKVStoreActor(self): etcd_port = get_next_port() proc_helper = EtcdProcessHelper(port_range_start=etcd_port) options.kv_store = f'etcd://127.0.0.1:{etcd_port}' with proc_helper.run(), create_actor_pool(n_process=1, backend='gevent') as pool: store_ref = pool.create_actor(KVStoreActor, uid=KVStoreActor.default_uid()) store_ref.write('/node/v1', 'value1') store_ref.write('/node/v2', 'value2') store_ref.write_batch([ ('/node/v2', 'value2'), ('/node/v3', 'value3'), ]) self.assertEqual(store_ref.read('/node/v1').value, 'value1') self.assertListEqual([v.value for v in store_ref.read_batch(['/node/v2', '/node/v3'])], ['value2', 'value3']) store_ref.delete('/node', dir=True, recursive=True) with self.assertRaises(KeyError): store_ref.delete('/node', dir=True, recursive=True) store_ref.delete('/node', dir=True, recursive=True, silent=True)
def testProcMemPutAndGet(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data2 = dataserializer.serialize(data2) bytes_data2 = ser_data2.to_buffer() session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY)) handler.put_object(session_id, data_key1, data1) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key1)), [(0, DataStorageDevice.PROC_MEMORY)]) assert_allclose(data1, handler.get_object(session_id, data_key1)) handler.delete(session_id, data_key1) self.assertIsNone(storage_manager_ref.get_data_locations(session_id, data_key1)) with self.assertRaises(KeyError): handler.get_object(session_id, data_key1) handler.put_object(session_id, data_key2, ser_data2, serialized=True) assert_allclose(data2, handler.get_object(session_id, data_key2)) handler.delete(session_id, data_key2) handler.put_object(session_id, data_key2, bytes_data2, serialized=True) assert_allclose(data2, handler.get_object(session_id, data_key2)) handler.delete(session_id, data_key2)
def testStatus(self): pool_address = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: pool.create_actor(ClusterInfoActor, schedulers=[pool_address], uid=ClusterInfoActor.default_name()) pool.create_actor(KVStoreActor, uid='KVStoreActor') pool.create_actor(ChunkHolderActor, self._plasma_helper._size, uid='ChunkHolderActor') pool.create_actor(StatusActor, '127.0.0.1:1234', uid='StatusActor') def delay_read(): gevent.sleep(2) return self._kv_store.read('/workers/meta', recursive=True) gl = gevent.spawn(delay_read) gl.join() v = gl.value print(v)
def testSharedLoadFromObjects(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) # load from object io ref_data1 = weakref.ref(data1) proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY)) proc_handler.put_objects(session_id, [data_key1], [data1]) del data1 handler.load_from_object_io(session_id, [data_key1], proc_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)]) proc_handler.delete(session_id, [data_key1]) self.assertIsNone(ref_data1()) handler.delete(session_id, [data_key1])
async def test_session_service(test_web): pool = await mo.create_actor_pool('127.0.0.1', n_process=0) async with pool: config = { "services": ["cluster", "session", "meta"], "cluster": { "backend": "fixed", "lookup_address": pool.external_address, }, "meta": { "store": "dict" } } if test_web: config['services'] += ['web'] config['web'] = {'port': get_next_port()} await start_services(NodeRole.SUPERVISOR, config, address=pool.external_address) if not test_web: session_api = await SessionAPI.create(pool.external_address) else: session_api = WebSessionAPI( f'http://127.0.0.1:{config["web"]["port"]}') session_id = 'test_session' session_address = await session_api.create_session(session_id) assert session_address == pool.external_address assert await session_api.has_session(session_id) is True assert (await session_api.get_sessions())[0].session_id == session_id if not test_web: assert await session_api.get_session_address(session_id ) == session_address await session_api.delete_session(session_id) assert await session_api.has_session(session_id) is False assert await session_api.get_sessions() == []
def testHolder(self): pool_address = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_name()) pool.create_actor(ClusterInfoActor, schedulers=[pool_address], uid=ClusterInfoActor.default_name()) pool.create_actor(KVStoreActor, uid=KVStoreActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(QuotaActor, 1024 * 1024 * 10, uid=MemQuotaActor.default_name()) cache_ref = pool.create_actor(ChunkHolderActor, self.plasma_storage_size, uid=ChunkHolderActor.default_name()) pool.create_actor(SpillActor) try: test_ref = pool.create_actor(CacheTestActor) test_ref.run_test_cache() while not test_ref.get_exc_info()[0]: pool.sleep(0.1) exc_info = test_ref.get_exc_info()[1] if exc_info: six.reraise(*exc_info) finally: pool.destroy_actor(cache_ref)
def testDaemon(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=2, backend='gevent', distributor=MarsDistributor(2, 'w:0:'), address=mock_scheduler_addr) as pool: daemon_ref = pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor, uid='w:1:DaemonSleeperActor') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper') test_actor = pool.create_actor(DaemonTestActor) daemon_ref.register_actor_callback( test_actor, DaemonTestActor.handle_process_down_for_actors.__name__) test_actor.run_test_sleep(sleeper_ref, 10, _tell=True) self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.sleep(0.5) daemon_ref.kill_actor_process(sleeper_ref) # repeated kill shall not produce errors daemon_ref.kill_actor_process(sleeper_ref) self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.restart_process(1) daemon_ref.handle_process_down([1]) pool.sleep(1) self.assertTrue(pool.has_actor(sleeper_ref)) with self.assertRaises(WorkerProcessStopped): test_actor.get_result() test_actor.run_test_sleep(sleeper_ref, 1) pool.sleep(1.5) test_actor.get_result()
def testStopGraphCalc(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=2, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_status=False) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) calc_ref = daemon_ref.create_actor(MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a') test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor') test_actor.run_simple_calc(session_id, _tell=True) pool.sleep(2) proc_id = pool.distributor.distribute(calc_ref.uid) execution_ref.stop_execution(session_id, test_actor.get_graph_key(), _tell=True) while daemon_ref.is_actor_process_alive(calc_ref): pool.sleep(0.1) pool.restart_process(proc_id) daemon_ref.handle_process_down([proc_id]) with self.assertRaises(ExecutionInterrupted): self.wait_for_result(pool, test_actor)
def _prepare_test_graph(self, session_id, graph_key, mock_workers): addr = '127.0.0.1:%d' % get_next_port() a1 = mt.random.random((100, )) a2 = mt.random.random((100, )) s = a1 + a2 v1, v2 = mt.split(s, 2) graph = DAG() v1.build_graph(graph=graph, compose=False) v2.build_graph(graph=graph, compose=False) with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.default_name()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_name( session_id, graph_key)) for w in mock_workers: resource_ref.set_worker_meta(w, dict(hardware=dict(cpu_total=4))) graph_ref.prepare_graph() graph_ref.scan_node() graph_ref.place_initial_chunks() graph_ref.create_operand_actors(_start=False) yield pool, graph_ref
async def test_start_service(actor_pool_context): pool = actor_pool_context web_port = get_next_port() config = { 'services': [['test_svc1'], 'test_svc2', 'web'], 'test_svc1': { 'uid': 'TestActor1', 'arg1': 'val1' }, 'test_svc2': { 'uid': 'TestActor2', 'arg2': 'val2', 'ref': 'TestActor1' }, 'web': { 'port': web_port }, } await start_services(NodeRole.SUPERVISOR, config, 'mars.services.tests.test_svcs', address=pool.external_address) ref1 = await mo.actor_ref('TestActor1', address=pool.external_address) ref2 = await mo.actor_ref('TestActor2', address=pool.external_address) assert await ref1.get_arg() == 'val1' assert await ref2.get_arg() == 'val1:val2' with pytest.raises(ImportError): await start_services(NodeRole.SUPERVISOR, {'services': ['non-exist-svc']}, address=pool.external_address) http_client = AsyncHTTPClient() resp = await http_client.fetch( f'http://127.0.0.1:{web_port}/test_actor1/test_api') assert resp.body.decode() == 'val1'
def testEstimateGraphFinishTime(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) status_ref = pool.actor_ref(StatusActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((10, 8), chunk_size=10) graph = arr.build_graph(compose=False, tiled=True) arr = get_tiled(arr) graph_key = str(uuid.uuid4()) for _ in range(options.optimize.min_stats_count + 1): status_ref.update_mean_stats( 'calc_speed.' + type(arr.chunks[0].op).__name__, 10) status_ref.update_mean_stats('disk_read_speed', 10) status_ref.update_mean_stats('disk_write_speed', 10) status_ref.update_mean_stats('net_transfer_speed', 10) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None) execution_ref.estimate_graph_finish_time(session_id, graph_key) stats_dict = status_ref.get_stats( ['min_est_finish_time', 'max_est_finish_time']) self.assertIsNotNone(stats_dict.get('min_est_finish_time')) self.assertIsNotNone(stats_dict.get('max_est_finish_time'))