Beispiel #1
0
def test_proxy_manager_lifecycle(shutdown_only):
    """
    Creates a ProxyManager and tests basic handling of the lifetime of a
    specific RayClient Server. It checks the following properties:
    1. The SpecificServer is created using the first port.
    2. The SpecificServer comes alive.
    3. The SpecificServer destructs itself when no client connects.
    4. The ProxyManager returns the port of the destructed SpecificServer.
    """
    ray_instance = ray.init()
    proxier.CHECK_PROCESS_INTERVAL_S = 1
    os.environ["TIMEOUT_FOR_SPECIFIC_SERVER_S"] = "5"
    pm = proxier.ProxyManager(ray_instance["redis_address"],
                              ray_instance["session_dir"])
    pm._free_ports = [45000, 45001]
    client = "client1"

    pm.create_specific_server(client)
    assert pm.start_specific_server(client, JobConfig())
    # Channel should be ready and corresponding to an existing server
    grpc.channel_ready_future(pm.get_channel(client)).result(timeout=5)

    proc = pm._get_server_for_client(client)
    assert proc.port == 45000

    proc.process_handle_future.result().process.wait(10)
    # Wait for reconcile loop
    time.sleep(2)

    assert len(pm._free_ports) == 2
    assert pm._get_unused_port() == 45001
def start_ray_and_proxy_manager(n_ports=2):
    agent_port = random.choice(range(50000, 55000))
    ray_instance = ray.init(_redis_password="******")
    agent_port = ray.worker.global_worker.node.metrics_agent_port
    pm = proxier.ProxyManager(ray_instance["redis_address"],
                              session_dir=ray_instance["session_dir"],
                              redis_password="******",
                              runtime_env_agent_port=agent_port)
    free_ports = random.choices(range(45000, 45100), k=n_ports)
    pm._free_ports = free_ports.copy()

    return pm, free_ports
Beispiel #3
0
def start_ray_and_proxy_manager(n_ports=2):
    ray_instance = ray.init(_redis_password=REDIS_DEFAULT_PASSWORD)
    agent_port = ray._private.worker.global_worker.node.metrics_agent_port
    pm = proxier.ProxyManager(
        ray_instance["address"],
        session_dir=ray_instance["session_dir"],
        redis_password=REDIS_DEFAULT_PASSWORD,
        runtime_env_agent_port=agent_port,
    )
    free_ports = random.choices(range(45000, 45100), k=n_ports)
    pm._free_ports = free_ports.copy()

    return pm, free_ports
Beispiel #4
0
def test_proxy_manager_bad_startup(shutdown_only):
    """
    Test that when a SpecificServer fails to start (because of a bad JobConfig)
    that it is properly GC'd
    """
    ray_instance = ray.init()
    proxier.CHECK_PROCESS_INTERVAL_S = 1
    proxier.CHECK_CHANNEL_TIMEOUT_S = 1
    pm = proxier.ProxyManager(ray_instance["redis_address"],
                              ray_instance["session_dir"])
    pm._free_ports = [46000, 46001]
    client = "client1"

    assert not pm.start_specific_server(
        client,
        JobConfig(
            runtime_env={"conda": "conda-env-that-sadly-does-not-exist"}))
    # Wait for reconcile loop
    time.sleep(2)
    assert pm.get_channel(client) is None

    assert len(pm._free_ports) == 2
Beispiel #5
0
def test_proxy_manager_lifecycle(shutdown_only):
    """
    Creates a ProxyManager and tests basic handling of the lifetime of a
    specific RayClient Server. It checks the following properties:
    1. The SpecificServer is created using the first port.
    2. The SpecificServer comes alive and has a log associated with it.
    3. The SpecificServer destructs itself when no client connects.
    4. The ProxyManager returns the port of the destructed SpecificServer.
    """
    ray_instance = ray.init()
    proxier.CHECK_PROCESS_INTERVAL_S = 1
    os.environ["TIMEOUT_FOR_SPECIFIC_SERVER_S"] = "5"
    pm = proxier.ProxyManager(ray_instance["redis_address"],
                              session_dir=ray_instance["session_dir"])
    # NOTE: We use different ports between runs because sometimes the port is
    # not released, introducing flakiness.
    port_one, port_two = random.choices(range(45000, 45100), k=2)
    pm._free_ports = [port_one, port_two]
    client = "client1"

    pm.create_specific_server(client)
    assert pm.start_specific_server(client, JobConfig())
    # Channel should be ready and corresponding to an existing server
    grpc.channel_ready_future(pm.get_channel(client)).result(timeout=5)

    proc = pm._get_server_for_client(client)
    assert proc.port == port_one, f"Free Ports are: [{port_one}, {port_two}]"

    log_files_path = os.path.join(pm.node.get_session_dir_path(), "logs",
                                  "ray_client_server*")
    files = glob(log_files_path)
    assert any(str(port_one) in f for f in files)

    proc.process_handle_future.result().process.wait(10)
    # Wait for reconcile loop
    time.sleep(2)

    assert len(pm._free_ports) == 2
    assert pm._get_unused_port() == port_two
Beispiel #6
0
def test_proxy_manager_internal_kv(shutdown_only, with_specific_server):
    """
    Test that proxy manager can use internal kv with and without a
    SpecificServer and that once a SpecificServer is started up, it
    goes through it.
    """

    ray_instance = ray.init(_redis_password="******")
    proxier.CHECK_PROCESS_INTERVAL_S = 1
    # The timeout has likely been set to 1 in an earlier test. Increase timeout
    # to wait for the channel to become ready.
    proxier.CHECK_CHANNEL_TIMEOUT_S = 5
    os.environ["TIMEOUT_FOR_SPECIFIC_SERVER_S"] = "5"
    pm = proxier.ProxyManager(ray_instance["redis_address"],
                              session_dir=ray_instance["session_dir"],
                              redis_password="******")
    port_one, port_two = random.choices(range(45000, 45100), k=2)
    pm._free_ports = [port_one, port_two]
    client = "client1"

    task_servicer = proxier.RayletServicerProxy(None, pm)

    def make_internal_kv_calls():
        response = task_servicer.KVPut(
            ray_client_pb2.KVPutRequest(key=b"key", value=b"val"))
        assert isinstance(response, ray_client_pb2.KVPutResponse)
        assert not response.already_exists

        response = task_servicer.KVPut(
            ray_client_pb2.KVPutRequest(key=b"key", value=b"val2"))
        assert isinstance(response, ray_client_pb2.KVPutResponse)
        assert response.already_exists

        response = task_servicer.KVGet(ray_client_pb2.KVGetRequest(key=b"key"))
        assert isinstance(response, ray_client_pb2.KVGetResponse)
        assert response.value == b"val"

        response = task_servicer.KVPut(
            ray_client_pb2.KVPutRequest(key=b"key",
                                        value=b"val2",
                                        overwrite=True))
        assert isinstance(response, ray_client_pb2.KVPutResponse)
        assert response.already_exists

        response = task_servicer.KVGet(ray_client_pb2.KVGetRequest(key=b"key"))
        assert isinstance(response, ray_client_pb2.KVGetResponse)
        assert response.value == b"val2"

    with patch("ray.util.client.server.proxier._get_client_id_from_context"
               ) as mock_get_client_id:
        mock_get_client_id.return_value = client

        if with_specific_server:
            pm.create_specific_server(client)
            assert pm.start_specific_server(client, JobConfig())
            channel = pm.get_channel(client)
            assert channel is not None
            task_servicer.Init(
                ray_client_pb2.InitRequest(
                    job_config=pickle.dumps(JobConfig())))

            # Mock out the internal kv calls in this process to raise an
            # exception if they're called. This verifies that we are not
            # making any calls in the proxier if there is a SpecificServer
            # started up.
            with patch(
                    "ray.experimental.internal_kv._internal_kv_put"
            ) as mock_put, patch(
                    "ray.experimental.internal_kv._internal_kv_get"
            ) as mock_get, patch(
                    "ray.experimental.internal_kv._internal_kv_initialized"
            ) as mock_initialized:
                mock_put.side_effect = Exception("This shouldn't be called!")
                mock_get.side_effect = Exception("This shouldn't be called!")
                mock_initialized.side_effect = Exception(
                    "This shouldn't be called!")
                make_internal_kv_calls()
        else:
            make_internal_kv_calls()