Example #1
0
def test_fixed_number_proxies(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)

    ray.init(head_node.address)
    node_ids = ray.state.node_ids()
    assert len(node_ids) == 3

    with pytest.raises(
            pydantic.ValidationError,
            match="you must specify the `fixed_number_replicas` parameter."):
        serve.start(http_options={
            "location": "FixedNumber",
        })

    serve.start(http_options={
        "port": new_port(),
        "location": "FixedNumber",
        "fixed_number_replicas": 2
    })

    # Only the controller and two http proxy should be started.
    controller_handle = _get_global_client()._controller
    node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote())
    assert len(node_to_http_actors) == 2

    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()
def deploy_replicas(num_replicas, max_batch_size):
    name = "echo"

    @serve.deployment(name=name, num_replicas=num_replicas)
    class Echo:
        @serve.batch(max_batch_size=max_batch_size)
        async def handle_batch(self, requests):
            return ["hi" for _ in range(len(requests))]

        async def __call__(self, request):
            return await self.handle_batch(request)

    # Set _blocking=False to allow for a custom extended grace period for the
    # health check, which is necessary to prevent this test from being flaky.
    Echo.deploy(_blocking=False)

    start = time.time()
    client = _get_global_client()
    # Wait for up to 10 minutes for the deployment to be healthy, allowing
    # time for any actors that crashed to restart.
    while time.time() - start < 10 * 60:
        try:
            # Raises RuntimeError if deployment enters the "UNHEALTHY" state.
            client._wait_for_deployment_healthy(name)
        except RuntimeError:
            time.sleep(1)
            pass

    # If the deployment is still unhealthy at this point, allow RuntimeError
    # to be raised and let this test fail.
    client._wait_for_deployment_healthy(name)
Example #3
0
def test_handle_cache_out_of_scope(serve_instance):
    # https://github.com/ray-project/ray/issues/18980
    initial_num_cached = len(_get_global_client().handle_cache)

    @serve.deployment(name="f")
    def f():
        return "hi"

    f.deploy()
    handle = serve.get_deployment("f").get_handle()

    handle_cache = _get_global_client().handle_cache
    assert len(handle_cache) == initial_num_cached + 1

    def sender_where_handle_goes_out_of_scope():
        f = serve.get_deployment("f").get_handle()
        assert f is handle
        assert ray.get(f.remote()) == "hi"

    [sender_where_handle_goes_out_of_scope() for _ in range(30)]
    assert len(handle_cache) == initial_num_cached + 1