Ejemplo n.º 1
0
async def test_two_pools():
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None

    ctx = get_context()

    pool1 = await create_actor_pool('127.0.0.1',
                                    pool_cls=MainActorPool,
                                    n_process=2,
                                    subprocess_start_method=start_method)
    pool2 = await create_actor_pool('127.0.0.1',
                                    pool_cls=MainActorPool,
                                    n_process=2,
                                    subprocess_start_method=start_method)

    def is_interprocess_address(addr):
        if sys.platform.startswith('win'):
            return re.match(r'127\.0\.0\.1:\d+', addr)
        else:
            return addr.startswith('unixsocket://')

    try:
        actor_ref1 = await ctx.create_actor(TestActor,
                                            address=pool1.external_address,
                                            allocate_strategy=MainPool())
        assert actor_ref1.address == pool1.external_address
        assert await actor_ref1.add(1) == 1
        assert Router.get_instance().get_internal_address(
            actor_ref1.address).startswith('dummy://')

        actor_ref2 = await ctx.create_actor(TestActor,
                                            address=pool1.external_address,
                                            allocate_strategy=RandomSubPool())
        assert actor_ref2.address in pool1._config.get_external_addresses()[1:]
        assert await actor_ref2.add(3) == 3
        assert is_interprocess_address(
            Router.get_instance().get_internal_address(actor_ref2.address))

        actor_ref3 = await ctx.create_actor(TestActor,
                                            address=pool2.external_address,
                                            allocate_strategy=MainPool())
        assert actor_ref3.address == pool2.external_address
        assert await actor_ref3.add(5) == 5
        assert Router.get_instance().get_internal_address(
            actor_ref3.address).startswith('dummy://')

        actor_ref4 = await ctx.create_actor(TestActor,
                                            address=pool2.external_address,
                                            allocate_strategy=RandomSubPool())
        assert actor_ref4.address in pool2._config.get_external_addresses()[1:]
        assert await actor_ref4.add(7) == 7
        assert is_interprocess_address(
            Router.get_instance().get_internal_address(actor_ref4.address))

        assert await actor_ref2.add_other(actor_ref4, 3) == 13
    finally:
        await pool1.stop()
        await pool2.stop()
Ejemplo n.º 2
0
async def actor_pool():
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    pool = await mo.create_actor_pool('127.0.0.1', n_process=3,
                                      labels=['main'] + ['numa-0'] * 2 + ['io'],
                                      subprocess_start_method=start_method)

    async with pool:
        session_id = 'test_session'
        # create mock APIs
        await MockClusterAPI.create(pool.external_address, band_to_slots={'numa-0': 2})
        await MockSessionAPI.create(pool.external_address, session_id=session_id)
        meta_api = await MockMetaAPI.create(session_id, pool.external_address)
        lifecycle_api = await MockLifecycleAPI.create(session_id, pool.external_address)
        storage_api = await MockStorageAPI.create(session_id, pool.external_address)
        await MockSchedulingAPI.create(session_id, pool.external_address)
        await MockSubtaskAPI.create(pool.external_address)

        # create configuration
        await mo.create_actor(TaskConfigurationActor, dict(),
                              uid=TaskConfigurationActor.default_uid(),
                              address=pool.external_address)
        # create task manager
        manager = await mo.create_actor(TaskManagerActor, session_id,
                                        uid=TaskManagerActor.gen_uid(session_id),
                                        address=pool.external_address,
                                        allocate_strategy=MainPool())

        yield pool, session_id, meta_api, lifecycle_api, storage_api, manager

        await MockStorageAPI.cleanup(pool.external_address)
Ejemplo n.º 3
0
async def test_auto_recover(ray_start_regular, auto_recover):
    pg_name, n_process = 'ray_cluster', 1
    pg = ray.util.placement_group(name=pg_name, bundles=[{'CPU': n_process}])
    assert pg.wait(timeout_seconds=20)
    address = process_placement_to_address(pg_name, 0, process_index=0)
    actor_handle = await mo.create_actor_pool(address,
                                              n_process=n_process,
                                              auto_recover=auto_recover)
    await actor_handle.actor_pool.remote('start')

    ctx = get_context()

    # wait for recover of main pool always returned immediately
    await ctx.wait_actor_pool_recovered(address, address)

    # create actor on main
    actor_ref = await ctx.create_actor(TestActor,
                                       address=address,
                                       allocate_strategy=MainPool())

    with pytest.raises(ValueError):
        # cannot kill actors on main pool
        await mo.kill_actor(actor_ref)

    # create actor
    actor_ref = await ctx.create_actor(TestActor,
                                       address=address,
                                       allocate_strategy=ProcessIndex(1))
    # kill_actor will cause kill corresponding process
    await ctx.kill_actor(actor_ref)

    if auto_recover:
        await ctx.wait_actor_pool_recovered(actor_ref.address, address)
        sub_pool_address = process_placement_to_address(pg_name,
                                                        0,
                                                        process_index=1)
        sub_pool_handle = ray.get_actor(sub_pool_address)
        assert await sub_pool_handle.actor_pool.remote('health_check'
                                                       ) == PoolStatus.HEALTHY

        expect_has_actor = True if auto_recover in ['actor', True] else False
        assert await ctx.has_actor(actor_ref) is expect_has_actor
    else:
        with pytest.raises((ServerClosed, ConnectionError)):
            await ctx.has_actor(actor_ref)

    if 'COV_CORE_SOURCE' in os.environ:
        for addr in [
                process_placement_to_address(pg_name, 0, process_index=i)
                for i in range(2)
        ]:
            # must save the local reference until this is fixed:
            # https://github.com/ray-project/ray/issues/7815
            ray_actor = ray.get_actor(addr)
            ray.get(ray_actor.cleanup.remote())
Ejemplo n.º 4
0
async def test_auto_recover(auto_recover):
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    recovered = asyncio.Event()

    def on_process_recover(*_):
        recovered.set()

    pool = await create_actor_pool('127.0.0.1',
                                   pool_cls=MainActorPool,
                                   n_process=2,
                                   subprocess_start_method=start_method,
                                   auto_recover=auto_recover,
                                   on_process_recover=on_process_recover)

    async with pool:
        ctx = get_context()

        # wait for recover of main pool always returned immediately
        await ctx.wait_actor_pool_recovered(pool.external_address,
                                            pool.external_address)

        # create actor on main
        actor_ref = await ctx.create_actor(TestActor,
                                           address=pool.external_address,
                                           allocate_strategy=MainPool())

        with pytest.raises(ValueError):
            # cannot kill actors on main pool
            await kill_actor(actor_ref)

        # create actor
        actor_ref = await ctx.create_actor(TestActor,
                                           address=pool.external_address,
                                           allocate_strategy=ProcessIndex(1))
        # kill_actor will cause kill corresponding process
        await ctx.kill_actor(actor_ref)

        if auto_recover:
            # process must have been killed
            await ctx.wait_actor_pool_recovered(actor_ref.address,
                                                pool.external_address)
            assert recovered.is_set()

            expect_has_actor = True if auto_recover in ['actor', True
                                                        ] else False
            assert await ctx.has_actor(actor_ref) is expect_has_actor
        else:
            with pytest.raises((ServerClosed, ConnectionError)):
                await ctx.has_actor(actor_ref)
Ejemplo n.º 5
0
async def actor_pool():
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    pool = await mo.create_actor_pool('127.0.0.1',
                                      n_process=2,
                                      labels=[None] + ['numa-0'] * 2,
                                      subprocess_start_method=start_method)

    async with pool:
        session_id = 'test_session'
        # create mock APIs
        await MockClusterAPI.create(pool.external_address)
        await MockSessionAPI.create(pool.external_address,
                                    session_id=session_id)
        meta_api = await MockMetaAPI.create(session_id, pool.external_address)
        storage_api = await MockStorageApi.create(session_id,
                                                  pool.external_address)

        # create task manager
        manager = await mo.create_actor(
            TaskManagerActor,
            session_id,
            uid=TaskManagerActor.gen_uid(session_id),
            address=pool.external_address,
            allocate_strategy=MainPool())

        # create band subtask manager
        await mo.create_actor(BandSubtaskManagerActor,
                              pool.external_address,
                              2,
                              uid=BandSubtaskManagerActor.gen_uid('numa-0'),
                              address=pool.external_address)

        yield pool, session_id, meta_api, storage_api, manager

        await MockStorageApi.cleanup(pool.external_address)
Ejemplo n.º 6
0
async def test_main_actor_pool():
    config = ActorPoolConfig()
    my_label = 'computation'
    main_address = f'127.0.0.1:{get_next_port()}'
    _add_pool_conf(config, 0, 'main', 'unixsocket:///0', main_address)
    _add_pool_conf(config,
                   1,
                   my_label,
                   'unixsocket:///1',
                   f'127.0.0.1:{get_next_port()}',
                   env={'my_env': '1'})
    _add_pool_conf(config, 2, my_label, 'unixsocket:///2',
                   f'127.0.0.1:{get_next_port()}')

    strategy = IdleLabel(my_label, 'my_test')

    async with await MainActorPool.create({'actor_pool_config':
                                           config}) as pool:
        create_actor_message = CreateActorMessage(new_message_id(), TestActor,
                                                  b'test', tuple(), dict(),
                                                  MainPool())
        message = await pool.create_actor(create_actor_message)
        actor_ref = message.result
        assert actor_ref.address == main_address

        create_actor_message1 = CreateActorMessage(new_message_id(), TestActor,
                                                   b'test1', tuple(), dict(),
                                                   strategy)
        message1 = await pool.create_actor(create_actor_message1)
        actor_ref1 = message1.result
        assert actor_ref1.address in config.get_external_addresses(my_label)

        create_actor_message2 = CreateActorMessage(new_message_id(), TestActor,
                                                   b'test2', tuple(), dict(),
                                                   strategy)
        message2 = await pool.create_actor(create_actor_message2)
        actor_ref2 = message2.result
        assert actor_ref2.address in config.get_external_addresses(my_label)
        assert actor_ref2.address != actor_ref1.address

        create_actor_message3 = CreateActorMessage(new_message_id(), TestActor,
                                                   b'test3', tuple(), dict(),
                                                   strategy)
        message3 = await pool.create_actor(create_actor_message3)
        # no slot to allocate the same label
        assert isinstance(message3.error, NoIdleSlot)

        has_actor_message = HasActorMessage(
            new_message_id(), create_actor_ref(main_address, b'test2'))
        assert (await pool.has_actor(has_actor_message)).result is True

        actor_ref_message = ActorRefMessage(
            new_message_id(), create_actor_ref(main_address, b'test2'))
        assert (await pool.actor_ref(actor_ref_message)).result == actor_ref2

        # tell
        tell_message = TellMessage(new_message_id(), actor_ref1,
                                   ('add', 0, (2, ), dict()))
        message = await pool.tell(tell_message)
        assert message.result is None

        # send
        send_message = SendMessage(new_message_id(), actor_ref1,
                                   ('add', 0, (4, ), dict()))
        assert (await pool.send(send_message)).result == 6

        # test error message
        # type mismatch
        send_message = SendMessage(new_message_id(), actor_ref1,
                                   ('add', 0, ('3', ), dict()))
        result = await pool.send(send_message)
        assert isinstance(result.error, TypeError)

        # send and tell to main process
        tell_message = TellMessage(new_message_id(), actor_ref,
                                   ('add', 0, (2, ), dict()))
        message = await pool.tell(tell_message)
        assert message.result is None
        send_message = SendMessage(new_message_id(), actor_ref,
                                   ('add', 0, (4, ), dict()))
        assert (await pool.send(send_message)).result == 6

        # send and cancel
        send_message = SendMessage(new_message_id(), actor_ref1,
                                   ('sleep', 0, (20, ), dict()))
        result_task = asyncio.create_task(pool.send(send_message))
        start = time.time()
        cancel_message = CancelMessage(new_message_id(), actor_ref1.address,
                                       send_message.message_id)
        cancel_task = asyncio.create_task(pool.cancel(cancel_message))
        result = await asyncio.wait_for(cancel_task, 3)
        assert result.message_type == MessageType.result
        assert result.result is True
        result = await result_task
        assert time.time() - start < 3
        assert result.message_type == MessageType.result
        assert result.result == 7

        # destroy
        destroy_actor_message = DestroyActorMessage(new_message_id(),
                                                    actor_ref1)
        message = await pool.destroy_actor(destroy_actor_message)
        assert message.result == actor_ref1.uid

        tell_message = TellMessage(new_message_id(), actor_ref1,
                                   ('add', 0, (2, ), dict()))
        message = await pool.tell(tell_message)
        assert isinstance(message, ErrorMessage)

        # destroy via connecting to sub pool directly
        async with await pool.router.get_client(
            config.get_external_addresses()[-1]) as client:
            destroy_actor_message = DestroyActorMessage(
                new_message_id(), actor_ref2)
            await client.send(destroy_actor_message)
            result = await client.recv()
            assert result.result == actor_ref2.uid

        # test sync config
        config.add_pool_conf(3, 'sub', 'unixsocket:///3',
                             f'127.0.0.1:{get_next_port()}')
        sync_config_message = ControlMessage(new_message_id(),
                                             pool.external_address,
                                             ControlMessageType.sync_config,
                                             config)
        message = await pool.handle_control_command(sync_config_message)
        assert message.result is True

        # test get config
        get_config_message = ControlMessage(new_message_id(),
                                            config.get_external_addresses()[1],
                                            ControlMessageType.get_config,
                                            None)
        message = await pool.handle_control_command(get_config_message)
        config2 = message.result
        assert config.as_dict() == config2.as_dict()

    assert pool.stopped
Ejemplo n.º 7
0
def test_main_pool():
    strategy = MainPool()
    assert strategy.get_allocated_address(config, dict()) == '127.0.0.1:1111'