async def test_two_pools(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None ctx = get_context() pool1 = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method) pool2 = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method) def is_interprocess_address(addr): if sys.platform.startswith('win'): return re.match(r'127\.0\.0\.1:\d+', addr) else: return addr.startswith('unixsocket://') try: actor_ref1 = await ctx.create_actor(TestActor, address=pool1.external_address, allocate_strategy=MainPool()) assert actor_ref1.address == pool1.external_address assert await actor_ref1.add(1) == 1 assert Router.get_instance().get_internal_address( actor_ref1.address).startswith('dummy://') actor_ref2 = await ctx.create_actor(TestActor, address=pool1.external_address, allocate_strategy=RandomSubPool()) assert actor_ref2.address in pool1._config.get_external_addresses()[1:] assert await actor_ref2.add(3) == 3 assert is_interprocess_address( Router.get_instance().get_internal_address(actor_ref2.address)) actor_ref3 = await ctx.create_actor(TestActor, address=pool2.external_address, allocate_strategy=MainPool()) assert actor_ref3.address == pool2.external_address assert await actor_ref3.add(5) == 5 assert Router.get_instance().get_internal_address( actor_ref3.address).startswith('dummy://') actor_ref4 = await ctx.create_actor(TestActor, address=pool2.external_address, allocate_strategy=RandomSubPool()) assert actor_ref4.address in pool2._config.get_external_addresses()[1:] assert await actor_ref4.add(7) == 7 assert is_interprocess_address( Router.get_instance().get_internal_address(actor_ref4.address)) assert await actor_ref2.add_other(actor_ref4, 3) == 13 finally: await pool1.stop() await pool2.stop()
async def actor_pool(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await mo.create_actor_pool('127.0.0.1', n_process=3, labels=['main'] + ['numa-0'] * 2 + ['io'], subprocess_start_method=start_method) async with pool: session_id = 'test_session' # create mock APIs await MockClusterAPI.create(pool.external_address, band_to_slots={'numa-0': 2}) await MockSessionAPI.create(pool.external_address, session_id=session_id) meta_api = await MockMetaAPI.create(session_id, pool.external_address) lifecycle_api = await MockLifecycleAPI.create(session_id, pool.external_address) storage_api = await MockStorageAPI.create(session_id, pool.external_address) await MockSchedulingAPI.create(session_id, pool.external_address) await MockSubtaskAPI.create(pool.external_address) # create configuration await mo.create_actor(TaskConfigurationActor, dict(), uid=TaskConfigurationActor.default_uid(), address=pool.external_address) # create task manager manager = await mo.create_actor(TaskManagerActor, session_id, uid=TaskManagerActor.gen_uid(session_id), address=pool.external_address, allocate_strategy=MainPool()) yield pool, session_id, meta_api, lifecycle_api, storage_api, manager await MockStorageAPI.cleanup(pool.external_address)
async def test_auto_recover(ray_start_regular, auto_recover): pg_name, n_process = 'ray_cluster', 1 pg = ray.util.placement_group(name=pg_name, bundles=[{'CPU': n_process}]) assert pg.wait(timeout_seconds=20) address = process_placement_to_address(pg_name, 0, process_index=0) actor_handle = await mo.create_actor_pool(address, n_process=n_process, auto_recover=auto_recover) await actor_handle.actor_pool.remote('start') ctx = get_context() # wait for recover of main pool always returned immediately await ctx.wait_actor_pool_recovered(address, address) # create actor on main actor_ref = await ctx.create_actor(TestActor, address=address, allocate_strategy=MainPool()) with pytest.raises(ValueError): # cannot kill actors on main pool await mo.kill_actor(actor_ref) # create actor actor_ref = await ctx.create_actor(TestActor, address=address, allocate_strategy=ProcessIndex(1)) # kill_actor will cause kill corresponding process await ctx.kill_actor(actor_ref) if auto_recover: await ctx.wait_actor_pool_recovered(actor_ref.address, address) sub_pool_address = process_placement_to_address(pg_name, 0, process_index=1) sub_pool_handle = ray.get_actor(sub_pool_address) assert await sub_pool_handle.actor_pool.remote('health_check' ) == PoolStatus.HEALTHY expect_has_actor = True if auto_recover in ['actor', True] else False assert await ctx.has_actor(actor_ref) is expect_has_actor else: with pytest.raises((ServerClosed, ConnectionError)): await ctx.has_actor(actor_ref) if 'COV_CORE_SOURCE' in os.environ: for addr in [ process_placement_to_address(pg_name, 0, process_index=i) for i in range(2) ]: # must save the local reference until this is fixed: # https://github.com/ray-project/ray/issues/7815 ray_actor = ray.get_actor(addr) ray.get(ray_actor.cleanup.remote())
async def test_auto_recover(auto_recover): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None recovered = asyncio.Event() def on_process_recover(*_): recovered.set() pool = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method, auto_recover=auto_recover, on_process_recover=on_process_recover) async with pool: ctx = get_context() # wait for recover of main pool always returned immediately await ctx.wait_actor_pool_recovered(pool.external_address, pool.external_address) # create actor on main actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=MainPool()) with pytest.raises(ValueError): # cannot kill actors on main pool await kill_actor(actor_ref) # create actor actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=ProcessIndex(1)) # kill_actor will cause kill corresponding process await ctx.kill_actor(actor_ref) if auto_recover: # process must have been killed await ctx.wait_actor_pool_recovered(actor_ref.address, pool.external_address) assert recovered.is_set() expect_has_actor = True if auto_recover in ['actor', True ] else False assert await ctx.has_actor(actor_ref) is expect_has_actor else: with pytest.raises((ServerClosed, ConnectionError)): await ctx.has_actor(actor_ref)
async def actor_pool(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await mo.create_actor_pool('127.0.0.1', n_process=2, labels=[None] + ['numa-0'] * 2, subprocess_start_method=start_method) async with pool: session_id = 'test_session' # create mock APIs await MockClusterAPI.create(pool.external_address) await MockSessionAPI.create(pool.external_address, session_id=session_id) meta_api = await MockMetaAPI.create(session_id, pool.external_address) storage_api = await MockStorageApi.create(session_id, pool.external_address) # create task manager manager = await mo.create_actor( TaskManagerActor, session_id, uid=TaskManagerActor.gen_uid(session_id), address=pool.external_address, allocate_strategy=MainPool()) # create band subtask manager await mo.create_actor(BandSubtaskManagerActor, pool.external_address, 2, uid=BandSubtaskManagerActor.gen_uid('numa-0'), address=pool.external_address) yield pool, session_id, meta_api, storage_api, manager await MockStorageApi.cleanup(pool.external_address)
async def test_main_actor_pool(): config = ActorPoolConfig() my_label = 'computation' main_address = f'127.0.0.1:{get_next_port()}' _add_pool_conf(config, 0, 'main', 'unixsocket:///0', main_address) _add_pool_conf(config, 1, my_label, 'unixsocket:///1', f'127.0.0.1:{get_next_port()}', env={'my_env': '1'}) _add_pool_conf(config, 2, my_label, 'unixsocket:///2', f'127.0.0.1:{get_next_port()}') strategy = IdleLabel(my_label, 'my_test') async with await MainActorPool.create({'actor_pool_config': config}) as pool: create_actor_message = CreateActorMessage(new_message_id(), TestActor, b'test', tuple(), dict(), MainPool()) message = await pool.create_actor(create_actor_message) actor_ref = message.result assert actor_ref.address == main_address create_actor_message1 = CreateActorMessage(new_message_id(), TestActor, b'test1', tuple(), dict(), strategy) message1 = await pool.create_actor(create_actor_message1) actor_ref1 = message1.result assert actor_ref1.address in config.get_external_addresses(my_label) create_actor_message2 = CreateActorMessage(new_message_id(), TestActor, b'test2', tuple(), dict(), strategy) message2 = await pool.create_actor(create_actor_message2) actor_ref2 = message2.result assert actor_ref2.address in config.get_external_addresses(my_label) assert actor_ref2.address != actor_ref1.address create_actor_message3 = CreateActorMessage(new_message_id(), TestActor, b'test3', tuple(), dict(), strategy) message3 = await pool.create_actor(create_actor_message3) # no slot to allocate the same label assert isinstance(message3.error, NoIdleSlot) has_actor_message = HasActorMessage( new_message_id(), create_actor_ref(main_address, b'test2')) assert (await pool.has_actor(has_actor_message)).result is True actor_ref_message = ActorRefMessage( new_message_id(), create_actor_ref(main_address, b'test2')) assert (await pool.actor_ref(actor_ref_message)).result == actor_ref2 # tell tell_message = TellMessage(new_message_id(), actor_ref1, ('add', 0, (2, ), dict())) message = await pool.tell(tell_message) assert message.result is None # send send_message = SendMessage(new_message_id(), actor_ref1, ('add', 0, (4, ), dict())) assert (await pool.send(send_message)).result == 6 # test error message # type mismatch send_message = SendMessage(new_message_id(), actor_ref1, ('add', 0, ('3', ), dict())) result = await pool.send(send_message) assert isinstance(result.error, TypeError) # send and tell to main process tell_message = TellMessage(new_message_id(), actor_ref, ('add', 0, (2, ), dict())) message = await pool.tell(tell_message) assert message.result is None send_message = SendMessage(new_message_id(), actor_ref, ('add', 0, (4, ), dict())) assert (await pool.send(send_message)).result == 6 # send and cancel send_message = SendMessage(new_message_id(), actor_ref1, ('sleep', 0, (20, ), dict())) result_task = asyncio.create_task(pool.send(send_message)) start = time.time() cancel_message = CancelMessage(new_message_id(), actor_ref1.address, send_message.message_id) cancel_task = asyncio.create_task(pool.cancel(cancel_message)) result = await asyncio.wait_for(cancel_task, 3) assert result.message_type == MessageType.result assert result.result is True result = await result_task assert time.time() - start < 3 assert result.message_type == MessageType.result assert result.result == 7 # destroy destroy_actor_message = DestroyActorMessage(new_message_id(), actor_ref1) message = await pool.destroy_actor(destroy_actor_message) assert message.result == actor_ref1.uid tell_message = TellMessage(new_message_id(), actor_ref1, ('add', 0, (2, ), dict())) message = await pool.tell(tell_message) assert isinstance(message, ErrorMessage) # destroy via connecting to sub pool directly async with await pool.router.get_client( config.get_external_addresses()[-1]) as client: destroy_actor_message = DestroyActorMessage( new_message_id(), actor_ref2) await client.send(destroy_actor_message) result = await client.recv() assert result.result == actor_ref2.uid # test sync config config.add_pool_conf(3, 'sub', 'unixsocket:///3', f'127.0.0.1:{get_next_port()}') sync_config_message = ControlMessage(new_message_id(), pool.external_address, ControlMessageType.sync_config, config) message = await pool.handle_control_command(sync_config_message) assert message.result is True # test get config get_config_message = ControlMessage(new_message_id(), config.get_external_addresses()[1], ControlMessageType.get_config, None) message = await pool.handle_control_command(get_config_message) config2 = message.result assert config.as_dict() == config2.as_dict() assert pool.stopped
def test_main_pool(): strategy = MainPool() assert strategy.get_allocated_address(config, dict()) == '127.0.0.1:1111'