async def test_server_closed(): start_method = 'fork' if sys.platform != 'win32' else None pool = await create_actor_pool('127.0.0.1', n_process=2, subprocess_start_method=start_method, auto_recover=False) ctx = get_context() async with pool: actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=ProcessIndex(1)) # check if error raised normally when subprocess killed task = asyncio.create_task(actor_ref.sleep(10)) await asyncio.sleep(0) # kill subprocess 1 process = list(pool._sub_processes.values())[0] process.kill() with pytest.raises(ServerClosed): # process already been killed, # ServerClosed will be raised await task assert not process.is_alive() with pytest.raises(RuntimeError): await pool.start() # test server unreachable with pytest.raises(ConnectionError): await ctx.has_actor(actor_ref)
async def test_create_actor_pool(): pool = await create_actor_pool('127.0.0.1', n_process=2) async with pool: ctx = get_context() # actor on main pool actor_ref = await ctx.create_actor(TestActor, uid='test-1', address=pool.external_address) assert await actor_ref.add(3) == 3 assert await actor_ref.add(1) == 4 assert (await ctx.has_actor(actor_ref)) is True assert (await ctx.actor_ref(actor_ref)) == actor_ref await ctx.destroy_actor(actor_ref) assert (await ctx.has_actor(actor_ref)) is False # actor on sub pool actor_ref2 = await ctx.create_actor(TestActor, uid='test-2', address=pool.external_address, allocate_strategy=RandomSubPool()) assert actor_ref2.address != actor_ref.address assert await actor_ref2.add(3) == 3 assert await actor_ref2.add(1) == 4 with pytest.raises(RuntimeError): await actor_ref2.return_cannot_unpickle() assert (await ctx.has_actor(actor_ref2)) is True assert (await ctx.actor_ref(actor_ref2)) == actor_ref2 await ctx.destroy_actor(actor_ref2) assert (await ctx.has_actor(actor_ref2)) is False
async def test_parallel_allocate_idle_label(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method, labels=[None, 'my_label', 'my_label']) class _Actor(Actor): def get_pid(self): return os.getpid() ctx = get_context() strategy = IdleLabel('my_label', 'tests') tasks = [ ctx.create_actor(_Actor, allocate_strategy=strategy, address=pool.external_address), ctx.create_actor(_Actor, allocate_strategy=strategy, address=pool.external_address), ] refs = await asyncio.gather(*tasks) # outputs identical process ids, while the result should be different assert len({await ref.get_pid() for ref in refs}) == 2
async def test_two_pools(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None ctx = get_context() pool1 = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method) pool2 = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method) def is_interprocess_address(addr): if sys.platform.startswith('win'): return re.match(r'127\.0\.0\.1:\d+', addr) else: return addr.startswith('unixsocket://') try: actor_ref1 = await ctx.create_actor(TestActor, address=pool1.external_address, allocate_strategy=MainPool()) assert actor_ref1.address == pool1.external_address assert await actor_ref1.add(1) == 1 assert Router.get_instance().get_internal_address( actor_ref1.address).startswith('dummy://') actor_ref2 = await ctx.create_actor(TestActor, address=pool1.external_address, allocate_strategy=RandomSubPool()) assert actor_ref2.address in pool1._config.get_external_addresses()[1:] assert await actor_ref2.add(3) == 3 assert is_interprocess_address( Router.get_instance().get_internal_address(actor_ref2.address)) actor_ref3 = await ctx.create_actor(TestActor, address=pool2.external_address, allocate_strategy=MainPool()) assert actor_ref3.address == pool2.external_address assert await actor_ref3.add(5) == 5 assert Router.get_instance().get_internal_address( actor_ref3.address).startswith('dummy://') actor_ref4 = await ctx.create_actor(TestActor, address=pool2.external_address, allocate_strategy=RandomSubPool()) assert actor_ref4.address in pool2._config.get_external_addresses()[1:] assert await actor_ref4.add(7) == 7 assert is_interprocess_address( Router.get_instance().get_internal_address(actor_ref4.address)) assert await actor_ref2.add_other(actor_ref4, 3) == 13 finally: await pool1.stop() await pool2.stop()
async def test_auto_recover(ray_start_regular, auto_recover): pg_name, n_process = 'ray_cluster', 1 pg = ray.util.placement_group(name=pg_name, bundles=[{'CPU': n_process}]) assert pg.wait(timeout_seconds=20) address = process_placement_to_address(pg_name, 0, process_index=0) actor_handle = await mo.create_actor_pool(address, n_process=n_process, auto_recover=auto_recover) await actor_handle.actor_pool.remote('start') ctx = get_context() # wait for recover of main pool always returned immediately await ctx.wait_actor_pool_recovered(address, address) # create actor on main actor_ref = await ctx.create_actor(TestActor, address=address, allocate_strategy=MainPool()) with pytest.raises(ValueError): # cannot kill actors on main pool await mo.kill_actor(actor_ref) # create actor actor_ref = await ctx.create_actor(TestActor, address=address, allocate_strategy=ProcessIndex(1)) # kill_actor will cause kill corresponding process await ctx.kill_actor(actor_ref) if auto_recover: await ctx.wait_actor_pool_recovered(actor_ref.address, address) sub_pool_address = process_placement_to_address(pg_name, 0, process_index=1) sub_pool_handle = ray.get_actor(sub_pool_address) assert await sub_pool_handle.actor_pool.remote('health_check' ) == PoolStatus.HEALTHY expect_has_actor = True if auto_recover in ['actor', True] else False assert await ctx.has_actor(actor_ref) is expect_has_actor else: with pytest.raises((ServerClosed, ConnectionError)): await ctx.has_actor(actor_ref) if 'COV_CORE_SOURCE' in os.environ: for addr in [ process_placement_to_address(pg_name, 0, process_index=i) for i in range(2) ]: # must save the local reference until this is fixed: # https://github.com/ray-project/ray/issues/7815 ray_actor = ray.get_actor(addr) ray.get(ray_actor.cleanup.remote())
async def test_create_actor_pool(): pool = await create_actor_pool('127.0.0.1', n_process=2) async with pool: # test global router global_router = Router.get_instance() # global router should not be the identical one with pool's router assert global_router is not pool.router assert pool.external_address in global_router._curr_external_addresses assert pool.external_address in global_router._mapping ctx = get_context() # actor on main pool actor_ref = await ctx.create_actor(TestActor, uid='test-1', address=pool.external_address) assert await actor_ref.add(3) == 3 assert await actor_ref.add(1) == 4 assert (await ctx.has_actor(actor_ref)) is True assert (await ctx.actor_ref(actor_ref)) == actor_ref # test cancel task = asyncio.create_task(actor_ref.sleep(20)) await asyncio.sleep(0) task.cancel() assert await task == 5 await ctx.destroy_actor(actor_ref) assert (await ctx.has_actor(actor_ref)) is False # actor on sub pool actor_ref2 = await ctx.create_actor(TestActor, uid='test-2', address=pool.external_address, allocate_strategy=RandomSubPool()) assert actor_ref2.address != actor_ref.address assert await actor_ref2.add(3) == 3 assert await actor_ref2.add(1) == 4 with pytest.raises(RuntimeError): await actor_ref2.return_cannot_unpickle() assert (await ctx.has_actor(actor_ref2)) is True assert (await ctx.actor_ref(actor_ref2)) == actor_ref2 # test cancel task = asyncio.create_task(actor_ref2.sleep(20)) start = time.time() await asyncio.sleep(0) task.cancel() assert await task == 5 assert time.time() - start < 3 await ctx.destroy_actor(actor_ref2) assert (await ctx.has_actor(actor_ref2)) is False # after pool shutdown, global router must has been cleaned global_router = Router.get_instance() assert len(global_router._curr_external_addresses) == 0 assert len(global_router._mapping) == 0
async def test_auto_recover(auto_recover): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None recovered = asyncio.Event() def on_process_recover(*_): recovered.set() pool = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method, auto_recover=auto_recover, on_process_recover=on_process_recover) async with pool: ctx = get_context() # wait for recover of main pool always returned immediately await ctx.wait_actor_pool_recovered(pool.external_address, pool.external_address) # create actor on main actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=MainPool()) with pytest.raises(ValueError): # cannot kill actors on main pool await kill_actor(actor_ref) # create actor actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=ProcessIndex(1)) # kill_actor will cause kill corresponding process await ctx.kill_actor(actor_ref) if auto_recover: # process must have been killed await ctx.wait_actor_pool_recovered(actor_ref.address, pool.external_address) assert recovered.is_set() expect_has_actor = True if auto_recover in ['actor', True ] else False assert await ctx.has_actor(actor_ref) is expect_has_actor else: with pytest.raises((ServerClosed, ConnectionError)): await ctx.has_actor(actor_ref)
async def test_two_pools(): start_method = 'fork' if sys.platform != 'win32' else None ctx = get_context() pool1 = await create_actor_pool('127.0.0.1', n_process=2, subprocess_start_method=start_method) pool2 = await create_actor_pool('127.0.0.1', n_process=2, subprocess_start_method=start_method) try: actor_ref1 = await ctx.create_actor(TestActor, address=pool1.external_address, allocate_strategy=MainPool()) assert actor_ref1.address == pool1.external_address assert await actor_ref1.add(1) == 1 assert Router.get_instance().get_internal_address( actor_ref1.address).startswith('dummy://') actor_ref2 = await ctx.create_actor(TestActor, address=pool1.external_address, allocate_strategy=RandomSubPool()) assert actor_ref2.address in pool1._config.get_external_addresses()[1:] assert await actor_ref2.add(3) == 3 assert Router.get_instance().get_internal_address( actor_ref2.address).startswith('unixsocket://') actor_ref3 = await ctx.create_actor(TestActor, address=pool2.external_address, allocate_strategy=MainPool()) assert actor_ref3.address == pool2.external_address assert await actor_ref3.add(5) == 5 assert Router.get_instance().get_internal_address( actor_ref3.address).startswith('dummy://') actor_ref4 = await ctx.create_actor(TestActor, address=pool2.external_address, allocate_strategy=RandomSubPool()) assert actor_ref4.address in pool2._config.get_external_addresses()[1:] assert await actor_ref4.add(7) == 7 assert Router.get_instance().get_internal_address( actor_ref4.address).startswith('unixsocket://') assert await actor_ref2.add_other(actor_ref4, 3) == 13 finally: await pool1.stop() await pool2.stop()
async def test_auto_recover(auto_recover): start_method = 'fork' if sys.platform != 'win32' else None recovered = asyncio.Event() def on_process_recover(*_): recovered.set() pool = await create_actor_pool('127.0.0.1', n_process=2, subprocess_start_method=start_method, auto_recover=auto_recover, on_process_recover=on_process_recover) async with pool: ctx = get_context() # create actor on main actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=MainPool()) with pytest.raises(ValueError): # cannot kill actors on main pool await kill_actor(actor_ref) # create actor actor_ref = await ctx.create_actor(TestActor, address=pool.external_address, allocate_strategy=ProcessIndex(1)) # kill_actor will cause kill corresponding process await ctx.kill_actor(actor_ref) if auto_recover: # process must have been killed await recovered.wait() expect_has_actor = True if auto_recover in ['actor', True ] else False assert await ctx.has_actor(actor_ref) is expect_has_actor else: with pytest.raises(ServerClosed): await ctx.has_actor(actor_ref)
async def test_logging_config(logging_conf): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=1, subprocess_start_method=start_method, labels=[None, 'my_label'], logging_conf=logging_conf) class _Actor(Actor): def get_logger_level(self): logger = logging.getLogger(__name__) return logger.getEffectiveLevel() async with pool: ctx = get_context() strategy = IdleLabel('my_label', 'tests') ref = await ctx.create_actor(_Actor, allocate_strategy=strategy, address=pool.external_address) assert await ref.get_logger_level() == logging.DEBUG
async def test_server_closed(ray_start_regular): pg_name, n_process = 'ray_cluster', 1 pg = ray.util.placement_group(name=pg_name, bundles=[{'CPU': n_process}]) ray.get(pg.ready()) address = process_placement_to_address(pg_name, 0, process_index=0) # start the actor pool actor_handle = await mo.create_actor_pool(address, n_process=n_process) await actor_handle.actor_pool.remote('start') ctx = get_context() actor_main = await ctx.create_actor(TestActor, address=address, uid='Test-main', allocate_strategy=ProcessIndex(0)) actor_sub = await ctx.create_actor(TestActor, address=address, uid='Test-sub', allocate_strategy=ProcessIndex(1)) # test calling from ray driver to ray actor task = asyncio.create_task(actor_sub.crash()) with pytest.raises(ServerClosed): # process already died, # ServerClosed will be raised await task # wait for recover of sub pool await ctx.wait_actor_pool_recovered(actor_sub.address, address) # test calling from ray actor to ray actor task = asyncio.create_task(actor_main.kill(actor_sub.address, 'Test-sub')) with pytest.raises(ServerClosed): await task
async def test_create_actor_pool(): start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \ if sys.platform != 'win32' else None pool = await create_actor_pool('127.0.0.1', pool_cls=MainActorPool, n_process=2, subprocess_start_method=start_method) async with pool: # test global router global_router = Router.get_instance() # global router should not be the identical one with pool's router assert global_router is not pool.router assert pool.external_address in global_router._curr_external_addresses assert pool.external_address in global_router._mapping ctx = get_context() # actor on main pool actor_ref = await ctx.create_actor(TestActor, uid='test-1', address=pool.external_address) assert await actor_ref.add(3) == 3 assert await actor_ref.add(1) == 4 assert (await ctx.has_actor(actor_ref)) is True assert (await ctx.actor_ref(actor_ref)) == actor_ref # test cancel task = asyncio.create_task(actor_ref.sleep(20)) await asyncio.sleep(0) task.cancel() assert await task == 5 await ctx.destroy_actor(actor_ref) assert (await ctx.has_actor(actor_ref)) is False for f in actor_ref.add, ctx.actor_ref, ctx.destroy_actor: with pytest.raises(ActorNotExist): await f(actor_ref) # actor on sub pool actor_ref1 = await ctx.create_actor(TestActor, uid='test-main', address=pool.external_address) actor_ref2 = await ctx.create_actor(TestActor, uid='test-2', address=pool.external_address, allocate_strategy=RandomSubPool()) assert (await ctx.actor_ref(uid='test-2', address=actor_ref2.address)) == actor_ref2 main_ref = await ctx.actor_ref(uid='test-main', address=actor_ref2.address) assert main_ref.address == pool.external_address main_ref = await ctx.actor_ref(actor_ref1) assert main_ref.address == pool.external_address assert actor_ref2.address != actor_ref.address assert await actor_ref2.add(3) == 3 assert await actor_ref2.add(1) == 4 with pytest.raises(RuntimeError): await actor_ref2.return_cannot_unpickle() assert (await ctx.has_actor(actor_ref2)) is True assert (await ctx.actor_ref(actor_ref2)) == actor_ref2 # test cancel task = asyncio.create_task(actor_ref2.sleep(20)) start = time.time() await asyncio.sleep(0) task.cancel() assert await task == 5 assert time.time() - start < 3 await ctx.destroy_actor(actor_ref2) assert (await ctx.has_actor(actor_ref2)) is False # after pool shutdown, global router must has been cleaned global_router = Router.get_instance() assert len(global_router._curr_external_addresses) == 0 assert len(global_router._mapping) == 0