def test_deploy_handle_validation(serve_instance): class A: def b(self, *args): return "hello" serve_instance.deploy("f", A) handle = serve.get_handle("f") # Legacy code path assert ray.get(handle.options(method_name="b").remote()) == "hello" # New code path assert ray.get(handle.b.remote()) == "hello" with pytest.raises(AttributeError): handle.c.remote() # Test missing_ok case missing_handle = serve.get_handle("g", missing_ok=True) with pytest.raises(AttributeError): missing_handle.b.remote() serve_instance.deploy("g", A) # Old code path still work assert ray.get(missing_handle.options(method_name="b").remote()) == "hello" # Because the missing_ok flag, handle.b.remote won't work. with pytest.raises(AttributeError): missing_handle.b.remote()
def test_delete_endpoint(serve_instance, route): def function(): return "hello" backend_name = "delete-endpoint:v1" serve.create_backend(backend_name, function) endpoint_name = "delete_endpoint" + str(route) serve.create_endpoint(endpoint_name, backend=backend_name, route=route) serve.delete_endpoint(endpoint_name) # Check that we can reuse a deleted endpoint name and route. serve.create_endpoint(endpoint_name, backend=backend_name, route=route) if route is not None: assert requests.get( "http://127.0.0.1:8000/delete-endpoint").text == "hello" else: handle = serve.get_handle(endpoint_name) assert ray.get(handle.remote()) == "hello" # Check that deleting the endpoint doesn't delete the backend. serve.delete_endpoint(endpoint_name) serve.create_endpoint(endpoint_name, backend=backend_name, route=route) if route is not None: assert requests.get( "http://127.0.0.1:8000/delete-endpoint").text == "hello" else: handle = serve.get_handle(endpoint_name) assert ray.get(handle.remote()) == "hello"
def test_handle_option_chaining(serve_instance): # https://github.com/ray-project/ray/issues/12802 # https://github.com/ray-project/ray/issues/12798 class MultiMethod: def method_a(self, _): return "method_a" def method_b(self, _): return "method_b" def __call__(self, _): return "__call__" serve.create_backend("m", MultiMethod) serve.create_endpoint("m", backend="m") # get_handle should give you a clean handle handle1 = serve.get_handle("m").options(method_name="method_a") handle2 = serve.get_handle("m") # options().options() override should work handle3 = handle1.options(method_name="method_b") assert ray.get(handle1.remote()) == "method_a" assert ray.get(handle2.remote()) == "__call__" assert ray.get(handle3.remote()) == "method_b"
def test_imported_backend(serve_instance): config = BackendConfig(user_config="config") serve.create_backend("imported", "ray.serve.utils.MockImportedBackend", "input_arg", config=config) serve.create_endpoint("imported", backend="imported") # Basic sanity check. handle = serve.get_handle("imported") assert ray.get(handle.remote()) == {"arg": "input_arg", "config": "config"} # Check that updating backend config works. serve.update_backend_config("imported", BackendConfig(user_config="new_config")) assert ray.get(handle.remote()) == { "arg": "input_arg", "config": "new_config" } # Check that other call methods work. handle = handle.options(method_name="other_method") assert ray.get(handle.remote("hello")) == "hello" # Check that functions work as well. serve.create_backend("imported_func", "ray.serve.utils.mock_imported_function") serve.create_endpoint("imported_func", backend="imported_func") handle = serve.get_handle("imported_func") assert ray.get(handle.remote("hello")) == "hello"
def update_routes(self, endpoints: Dict[EndpointTag, EndpointInfo]) -> None: logger.debug(f"Got updated endpoints: {endpoints}.") existing_handles = set(self.handles.keys()) routes = [] route_info = {} for endpoint, info in endpoints.items(): # Default case where the user did not specify a route prefix. if info.route is None: route = f"/{endpoint}" else: route = info.route routes.append(route) route_info[route] = (endpoint, info.http_methods) if endpoint in self.handles: existing_handles.remove(endpoint) else: self.handles[endpoint] = serve.get_handle( endpoint, sync=False, missing_ok=True, _internal_pickled_http_request=True, ) # Clean up any handles that are no longer used. for endpoint in existing_handles: del self.handles[endpoint] # Routes are sorted in order of decreasing length to enable longest # prefix matching. self.sorted_routes = sorted(routes, key=lambda x: len(x), reverse=True) self.route_info = route_info
def test_new_driver(serve_instance): script = """ import ray ray.init(address="auto") from ray import serve serve.init() @serve.route("/driver") def driver(flask_request): return "OK!" """ with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: path = f.name f.write(script) proc = subprocess.Popen(["python", path]) return_code = proc.wait(timeout=10) assert return_code == 0 handle = serve.get_handle("driver") assert ray.get(handle.remote()) == "OK!" os.remove(path)
def test_batching(serve_instance): class BatchingExample: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, flask_request, temp=None): self.count += 1 batch_size = serve.context.batch_size return [self.count] * batch_size # set the max batch size serve.create_backend( "counter:v11", BatchingExample, config={"max_batch_size": 5}) serve.create_endpoint( "counter1", backend="counter:v11", route="/increment2") # Keep checking the routing table until /increment is populated while "/increment2" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) future_list = [] handle = serve.get_handle("counter1") for _ in range(20): f = handle.remote(temp=1) future_list.append(f) counter_result = ray.get(future_list) # since count is only updated per batch of queries # If there atleast one __call__ fn call with batch size greater than 1 # counter result will always be less than 20 assert max(counter_result) < 20
def test_parallel_start(serve_instance): # Test the ability to start multiple replicas in parallel. # In the past, when Serve scale up a backend, it does so one by one and # wait for each replica to initialize. This test avoid this by preventing # the first replica to finish initialization unless the second replica is # also started. @ray.remote class Barrier: def __init__(self, release_on): self.release_on = release_on self.current_waiters = 0 self.event = asyncio.Event() async def wait(self): self.current_waiters += 1 if self.current_waiters == self.release_on: self.event.set() else: await self.event.wait() barrier = Barrier.remote(release_on=2) class LongStartingServable: def __init__(self): ray.get(barrier.wait.remote(), timeout=10) def __call__(self, _): return "Ready" serve.create_backend( "p:v0", LongStartingServable, config={"num_replicas": 2}) serve.create_endpoint("test-parallel", backend="p:v0") handle = serve.get_handle("test-parallel") ray.get(handle.remote(), timeout=10)
def test_batching(serve_instance): class BatchingExample: def __init__(self): self.count = 0 @serve.batch(max_batch_size=5, batch_wait_timeout_s=1) async def handle_batch(self, requests): self.count += 1 batch_size = len(requests) return [self.count] * batch_size async def __call__(self, request): return await self.handle_batch(request) # set the max batch size serve.create_backend("counter:v11", BatchingExample) serve.create_endpoint("counter1", backend="counter:v11", route="/increment2") future_list = [] handle = serve.get_handle("counter1") for _ in range(20): f = handle.remote(temp=1) future_list.append(f) counter_result = ray.get(future_list) # since count is only updated per batch of queries # If there atleast one __call__ fn call with batch size greater than 1 # counter result will always be less than 20 assert max(counter_result) < 20
def test_backend_user_config(serve_instance): class Counter: def __init__(self): self.count = 10 def __call__(self, starlette_request): return self.count, os.getpid() def reconfigure(self, config): self.count = config["count"] config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2}) serve.create_backend("counter", Counter, config=config) serve.create_endpoint("counter", backend="counter") handle = serve.get_handle("counter") def check(val, num_replicas): pids_seen = set() for i in range(100): result = ray.get(handle.remote()) if str(result[0]) != val: return False pids_seen.add(result[1]) return len(pids_seen) == num_replicas wait_for_condition(lambda: check("123", 2)) serve.update_backend_config("counter", BackendConfig(num_replicas=3)) wait_for_condition(lambda: check("123", 3)) config = BackendConfig(user_config={"count": 456}) serve.update_backend_config("counter", config) wait_for_condition(lambda: check("456", 3))
def test_new_driver(serve_instance): script = """ import ray ray.init(address="{}") from ray import serve serve.init() def driver(flask_request): return "OK!" serve.create_endpoint("driver", "/driver") serve.create_backend("driver", driver) serve.set_traffic("driver", {{"driver": 1.0}}) """.format(ray.worker._global_node._redis_address) with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: path = f.name f.write(script) proc = subprocess.Popen(["python", path]) return_code = proc.wait(timeout=10) assert return_code == 0 handle = serve.get_handle("driver") assert ray.get(handle.remote()) == "OK!" os.remove(path)
async def __call__(self, scope, receive, send): http_body_bytes = await self.receive_http_body(scope, receive, send) headers = {k.decode(): v.decode() for k, v in scope["headers"]} if self.handle is None: self.handle = serve.get_handle(self.endpoint_tag, sync=False) object_ref = await self.handle.options( method_name=headers.get("X-SERVE-CALL-METHOD".lower(), DEFAULT.VALUE), shard_key=headers.get("X-SERVE-SHARD-KEY".lower(), DEFAULT.VALUE), http_method=scope["method"].upper(), http_headers=headers).remote( build_starlette_request(scope, http_body_bytes)) result = await object_ref if isinstance(result, RayTaskError): error_message = "Task Error. Traceback: {}.".format(result) await Response( error_message, status_code=500).send(scope, receive, send) elif isinstance(result, starlette.responses.Response): await result(scope, receive, send) else: await Response(result).send(scope, receive, send)
async def test_args_kwargs(serve_instance, sync, serve_request): @serve.deployment async def f(*args, **kwargs): if serve_request: req = args[0] assert await req.body() == "hi" assert req.query_params["kwarg1"] == 1 assert req.query_params["kwarg2"] == "2" else: assert args[0] == "hi" assert kwargs["kwarg1"] == 1 assert kwargs["kwarg2"] == "2" f.deploy() handle = serve.get_handle("f", sync=sync, _internal_use_serve_request=serve_request) def call(): return handle.remote("hi", kwarg1=1, kwarg2="2") if sync: obj_ref = call() else: obj_ref = await call() ray.get(obj_ref)
def test_no_route(serve_instance): def func(_, i=1): return 1 serve.create_backend("backend:1", func) serve.create_endpoint("noroute-endpoint", backend="backend:1") service_handle = serve.get_handle("noroute-endpoint") result = ray.get(service_handle.remote(i=1)) assert result == 1
def do_request(shard_key): if route is not None: url = "http://127.0.0.1:8000" + route headers = {"X-SERVE-SHARD-KEY": shard_key} result = requests.get(url, headers=headers).text else: handle = serve.get_handle("endpoint").options(shard_key=shard_key) result = ray.get(handle.options(shard_key=shard_key).remote()) return result
def __init__(self, endpoint_tag: EndpointTag, path_prefix: str): self.endpoint_tag = endpoint_tag self.path_prefix = path_prefix self.handle = serve.get_handle( self.endpoint_tag, sync=False, missing_ok=True, _internal_pickled_http_request=True, )
def test_repeated_get_handle_cached(serve_instance): def f(_): return "" serve.create_backend("m", f) serve.create_endpoint("m", backend="m") handle_sets = {serve.get_handle("m") for _ in range(100)} assert len(handle_sets) == 1
def call(block=False): if use_handle: ret = ray.get(serve.get_handle(name).remote(block=str(block))) else: ret = requests.get( f"http://localhost:8000/{name}", params={ "block": block }).text return ret.split("|")[0], ret.split("|")[1]
def test_no_route(serve_instance): serve.create_endpoint("noroute-endpoint") def func(_, i=1): return 1 serve.create_backend(func, "backend:1") serve.set_traffic("noroute-endpoint", {"backend:1": 1.0}) service_handle = serve.get_handle("noroute-endpoint") result = ray.get(service_handle.remote(i=1)) assert result == 1
def test_connect(detached, ray_shutdown): # Check that you can call serve.connect() from within a backend for both # detached and non-detached instances. ray.init(num_cpus=16) serve.start(detached=detached) def connect_in_backend(_): serve.create_backend("backend-ception", connect_in_backend) serve.create_backend("connect_in_backend", connect_in_backend) serve.create_endpoint("endpoint", backend="connect_in_backend") ray.get(serve.get_handle("endpoint").remote()) assert "backend-ception" in serve.list_backends().keys()
def test_sync_handle_serializable(serve_instance): def f(_): return "hello" serve.create_backend("f", f) serve.create_endpoint("f", backend="f") @ray.remote def task(handle): return ray.get(handle.remote()) handle = serve.get_handle("f", sync=True) result_ref = task.remote(handle) assert ray.get(result_ref) == "hello"
def test_detached_deployment(ray_cluster): # https://github.com/ray-project/ray/issues/11437 cluster = ray_cluster head_node = cluster.add_node(node_ip_address="127.0.0.1", num_cpus=6) # Create first job, check we can run a simple serve endpoint ray.init(head_node.address) first_job_id = ray.get_runtime_context().job_id serve.start(detached=True) serve.create_backend("f", lambda _: "hello") serve.create_endpoint("f", backend="f") assert ray.get(serve.get_handle("f").remote()) == "hello" serve.api._global_client = None ray.shutdown() # Create the second job, make sure we can still create new backends. ray.init(head_node.address) assert ray.get_runtime_context().job_id != first_job_id serve.create_backend("g", lambda _: "world") serve.create_endpoint("g", backend="g") assert ray.get(serve.get_handle("g").remote()) == "world"
async def test_serve_handle(ray_start_regular_shared): with ray_start_client_server() as ray: from ray import serve _explicitly_enable_client_mode() serve.start(detached=True) def hello(request): return "hello" serve.create_backend("my_backend", hello, config={"num_replicas": 1}) serve.create_endpoint("my_endpoint", backend="my_backend", route="/hello") handle = serve.get_handle("my_endpoint") assert ray.get(handle.remote()) == "hello" assert await handle.remote() == "hello"
def test_serve_graceful_shutdown(serve_instance): signal = SignalActor.remote() class WaitBackend: @serve.accept_batch async def __call__(self, requests): signal_actor = await requests[0].body() await signal_actor.wait.remote() return ["" for _ in range(len(requests))] serve.create_backend( "wait", WaitBackend, config=BackendConfig( # Make sure we can queue up queries in the replica side. max_concurrent_queries=10, max_batch_size=1, experimental_graceful_shutdown_wait_loop_s=0.5, experimental_graceful_shutdown_timeout_s=1000, )) serve.create_endpoint("wait", backend="wait") handle = serve.get_handle("wait") refs = [handle.remote(signal) for _ in range(10)] # Wait for all the queries to be enqueued with pytest.raises(ray.exceptions.GetTimeoutError): ray.get(refs, timeout=1) @ray.remote(num_cpus=0) def do_blocking_delete(): serve.delete_endpoint("wait") serve.delete_backend("wait") # Now delete the backend. This should trigger the shutdown sequence. delete_ref = do_blocking_delete.remote() # The queries should be enqueued but not executed becuase they are blocked # by signal actor. with pytest.raises(ray.exceptions.GetTimeoutError): ray.get(refs, timeout=1) signal.send.remote() # All the queries should be drained and executed without error. ray.get(refs) # Blocking delete should complete. ray.get(delete_ref)
def test_batching_exception(serve_instance): class NoListReturned: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, requests): return len(requests) # Set the max batch size. config = BackendConfig(max_batch_size=5) serve.create_backend("exception:v1", NoListReturned, config=config) serve.create_endpoint("exception-test", backend="exception:v1") handle = serve.get_handle("exception-test") with pytest.raises(ray.exceptions.RayTaskError): assert ray.get(handle.remote(temp=1))
def test_serve_forceful_shutdown(serve_instance): def sleeper(_): while True: time.sleep(1000) serve.create_backend( "sleeper", sleeper, config=BackendConfig(experimental_graceful_shutdown_timeout_s=1)) serve.create_endpoint("sleeper", backend="sleeper") handle = serve.get_handle("sleeper") ref = handle.remote() serve.delete_endpoint("sleeper") serve.delete_backend("sleeper") with pytest.raises(ray.exceptions.RayActorError): ray.get(ref)
def test_no_http(ray_shutdown): # The following should have the same effect. options = [ { "http_host": None }, { "http_options": { "host": None } }, { "http_options": { "location": None } }, { "http_options": { "location": "NoServer" } }, ] ray.init(num_cpus=16) for i, option in enumerate(options): print(f"[{i+1}/{len(options)}] Running with {option}") serve.start(**option) # Only controller actor should exist live_actors = [ actor for actor in ray.actors().values() if actor["State"] == ray.gcs_utils.ActorTableData.ALIVE ] assert len(live_actors) == 1 controller = serve.api._global_client._controller assert len(ray.get(controller.get_http_proxies.remote())) == 0 # Test that the handle still works. def hello(*args): return "hello" serve.create_backend("backend", hello) serve.create_endpoint("endpoint", backend="backend") assert ray.get(serve.get_handle("endpoint").remote()) == "hello" serve.shutdown()
def test_call_method(serve_instance): class CallMethod: def method(self, request): return "hello" serve.create_backend("backend", CallMethod) serve.create_endpoint("endpoint", backend="backend", route="/api") # Test HTTP path. resp = requests.get("http://127.0.0.1:8000/api", timeout=1, headers={"X-SERVE-CALL-METHOD": "method"}) assert resp.text == "hello" # Test serve handle path. handle = serve.get_handle("endpoint") assert ray.get(handle.options("method").remote()) == "hello"
def test_new_driver(serve_instance): script = """ import ray ray.init(address="{}") from ray import serve def driver(starlette_request): return "OK!" serve.create_backend("driver", driver) serve.create_endpoint("driver", backend="driver", route="/driver") """.format(ray.worker._global_node._redis_address) ray.test_utils.run_string_as_driver(script) handle = serve.get_handle("driver") assert ray.get(handle.remote()) == "OK!"
def test_no_route(serve_instance): serve.create_endpoint("noroute-endpoint", blocking=True) global_state = serve.api._get_global_state() result = global_state.route_table.list_service(include_headless=True) assert result[NO_ROUTE_KEY] == ["noroute-endpoint"] without_headless_result = global_state.route_table.list_service() assert NO_ROUTE_KEY not in without_headless_result def func(_, i=1): return 1 serve.create_backend(func, "backend:1") serve.link("noroute-endpoint", "backend:1") service_handle = serve.get_handle("noroute-endpoint") result = ray.get(service_handle.remote(i=1)) assert result == 1