def test_fixed_number_proxies(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) ray.init(head_node.address) node_ids = ray.state.node_ids() assert len(node_ids) == 3 with pytest.raises( pydantic.ValidationError, match="you must specify the `fixed_number_replicas` parameter."): serve.start(http_options={ "location": "FixedNumber", }) serve.start(http_options={ "port": new_port(), "location": "FixedNumber", "fixed_number_replicas": 2 }) # Only the controller and two http proxy should be started. controller_handle = _get_global_client()._controller node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote()) assert len(node_to_http_actors) == 2 serve.shutdown() ray.shutdown() cluster.shutdown()
def deploy_replicas(num_replicas, max_batch_size): name = "echo" @serve.deployment(name=name, num_replicas=num_replicas) class Echo: @serve.batch(max_batch_size=max_batch_size) async def handle_batch(self, requests): return ["hi" for _ in range(len(requests))] async def __call__(self, request): return await self.handle_batch(request) # Set _blocking=False to allow for a custom extended grace period for the # health check, which is necessary to prevent this test from being flaky. Echo.deploy(_blocking=False) start = time.time() client = _get_global_client() # Wait for up to 10 minutes for the deployment to be healthy, allowing # time for any actors that crashed to restart. while time.time() - start < 10 * 60: try: # Raises RuntimeError if deployment enters the "UNHEALTHY" state. client._wait_for_deployment_healthy(name) except RuntimeError: time.sleep(1) pass # If the deployment is still unhealthy at this point, allow RuntimeError # to be raised and let this test fail. client._wait_for_deployment_healthy(name)
def test_handle_cache_out_of_scope(serve_instance): # https://github.com/ray-project/ray/issues/18980 initial_num_cached = len(_get_global_client().handle_cache) @serve.deployment(name="f") def f(): return "hi" f.deploy() handle = serve.get_deployment("f").get_handle() handle_cache = _get_global_client().handle_cache assert len(handle_cache) == initial_num_cached + 1 def sender_where_handle_goes_out_of_scope(): f = serve.get_deployment("f").get_handle() assert f is handle assert ray.get(f.remote()) == "hi" [sender_where_handle_goes_out_of_scope() for _ in range(30)] assert len(handle_cache) == initial_num_cached + 1