def test_init_args(serve_instance): name = "test" @serve.deployment(name=name) class D: def __init__(self, val): self._val = val def __call__(self, *arg): return self._val, os.getpid() D.deploy("1") val1, pid1 = ray.get(D.get_handle().remote()) assert val1 == "1" del D D2 = serve.get_deployment(name=name) D2.deploy() val2, pid2 = ray.get(D2.get_handle().remote()) assert val2 == "1" assert pid2 != pid1 D2 = serve.get_deployment(name=name) D2.deploy("2") val3, pid3 = ray.get(D2.get_handle().remote()) assert val3 == "2" assert pid3 != pid2
def delete_deployment(self, name): if any(name == d["name"] for d in self.list_deployments()): serve.get_deployment(name).delete() # self.client.delete_endpoint(name) # self.client.delete_backend(name) logger.info("Deleted model with name: {}".format(name)) logger.info("Model with name {} does not exist.".format(name))
def update_deployment(self, name, model_uri=None, flavor=None, config=None): if model_uri is None: serve.get_deployment(name).options(**config).deploy() # self.client.update_backend_config(name, config) else: self.delete_deployment(name) self.create_deployment(name, model_uri, flavor, config) return {"name": name, "config": config, "flavor": "python_function"}
def test_get_after_delete(self, serve_instance): @serve.deployment(version="1") def d(*args): return "1", os.getpid() d.deploy() del d d2 = serve.get_deployment("d") d2.delete() del d2 with pytest.raises(KeyError): serve.get_deployment("d")
def create_deployment(self): if len(self.deployments) == self.max_deployments: deployment_to_delete = self.deployments.pop() serve.get_deployment(deployment_to_delete).delete() new_name = "".join( [random.choice(string.ascii_letters) for _ in range(10)]) @serve.deployment(name=new_name) def handler(self, *args): return new_name handler.deploy() self.deployments.append(new_name)
def remote(self, *args, **kwargs): if not self.handle: handle = serve.get_deployment(self.deployment_name).get_handle() self.handle = handle.options( method_name=self.handle_options.method_name) # TODO (jiaodong): Polish async handles later for serve pipeline return self.handle.remote(*args, **kwargs)
def call(): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote()) else: ret = requests.get(f"http://localhost:8000/{name}").text return ret.split("|")[0], ret.split("|")[1]
def test_import_path_deployment_decorated(self, serve_instance): func = serve.deployment( name="decorated_func", )("ray.serve.tests.test_application.decorated_func") clss = serve.deployment( name="decorated_clss", )("ray.serve.tests.test_application.DecoratedClass") deployments = [func, clss] responses = ["got decorated func", "got decorated class"] self.deploy_and_check_responses(deployments, responses) # Check that non-default decorated values were overwritten assert serve.get_deployment("decorated_func").max_concurrent_queries != 17 assert serve.get_deployment("decorated_clss").max_concurrent_queries != 17
def test_basic_get(self, serve_instance): @serve.deployment(version="1") def d(*args): return "1", os.getpid() with pytest.raises(KeyError): serve.get_deployment("d") d.deploy() val1, pid1 = ray.get(d.get_handle().remote()) assert val1 == "1" del d d2 = serve.get_deployment("d") val2, pid2 = ray.get(d2.get_handle().remote()) assert val2 == "1" assert pid2 == pid1
def call(block=False): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote(block)) else: ret = requests.get( f"http://localhost:8000/{name}", params={"block": block} ).text return ret.split("|")[0], ret.split("|")[1]
def test_basic_get(serve_instance): name = "test" @serve.deployment(name=name, version="1") def d(*args): return "1", os.getpid() with pytest.raises(KeyError): serve.get_deployment(name=name) handle = serve.run(d.bind()) val1, pid1 = ray.get(handle.remote()) assert val1 == "1" del d d2 = serve.get_deployment(name=name) val2, pid2 = ray.get(d2.get_handle().remote()) assert val2 == "1" assert pid2 == pid1
def test_controller_starts_java_replica(shutdown_only): # noqa: F811 ray.init( num_cpus=8, namespace="default_test_namespace", # A dummy code search path to enable cross language. job_config=JobConfig(code_search_path=["."]), ) client = serve.start(detached=True) controller = client._controller config = DeploymentConfig() config.deployment_language = JAVA config.is_cross_language = True replica_config = ReplicaConfig.create( "io.ray.serve.util.ExampleEchoDeployment", init_args=["my_prefix "], ) # Deploy it deployment_name = "my_java" updating = ray.get( controller.deploy.remote( name=deployment_name, deployment_config_proto_bytes=config.to_proto_bytes(), replica_config_proto_bytes=replica_config.to_proto_bytes(), route_prefix=None, deployer_job_id=ray.get_runtime_context().job_id, ) ) assert updating client._wait_for_deployment_healthy(deployment_name) # Let's try to call it! all_handles = ray.get(controller._all_running_replicas.remote()) backend_handle = all_handles["my_java"][0].actor_handle out = backend_handle.handleRequest.remote( RequestMetadata( request_id="id-1", endpoint="endpoint", call_method="call", ).SerializeToString(), RequestWrapper(body=msgpack_serialize("hello")).SerializeToString(), ) assert ray.get(out) == "my_prefix hello" handle = serve.get_deployment("my_java").get_handle() handle_out = handle.remote("hello handle") assert ray.get(handle_out) == "my_prefix hello handle" ray.get(controller.delete_deployment.remote(deployment_name)) client._wait_for_deployment_deleted(deployment_name)
def test_repeated_get_handle_cached(serve_instance): @serve.deployment def f(_): return "" f.deploy() handle_sets = {f.get_handle() for _ in range(100)} assert len(handle_sets) == 1 handle_sets = {serve.get_deployment("f").get_handle() for _ in range(100)} assert len(handle_sets) == 1
def _validate_consistent_python_output( deployment, dag, handle_by_name, input=None, output=None ): """Assert same input lead to same outputs across the following: 1) Deployment handle returned from Deployment instance get_handle() 2) Original executable Ray DAG 3) Deployment handle return from serve public API get_deployment() """ deployment_handle = deployment.get_handle() assert ray.get(deployment_handle.remote(input)) == output assert ray.get(dag.execute(input)) == output handle_by_name = serve.get_deployment(handle_by_name).get_handle() assert ray.get(handle_by_name.remote(input)) == output
def test_deploy_empty_version(self, serve_instance): @serve.deployment def d(*args): return "1", os.getpid() d.deploy() val1, pid1 = ray.get(d.get_handle().remote()) assert val1 == "1" del d d2 = serve.get_deployment("d") d2.deploy() val2, pid2 = ray.get(d2.get_handle().remote()) assert val2 == "1" assert pid2 != pid1
def test_new_driver(serve_instance): script = """ import ray ray.init(address="{}") from ray import serve @serve.deployment def driver(starlette_request): return "OK!" driver.deploy() """.format(ray.worker._global_node._redis_address) ray.test_utils.run_string_as_driver(script) handle = serve.get_deployment("driver").get_handle() assert ray.get(handle.remote()) == "OK!"
def test_new_driver(serve_instance): script = """ import ray ray.init(address="{}", namespace="default_test_namespace") from ray import serve @serve.deployment def driver(): return "OK!" driver.deploy() """.format(ray.worker._global_node.address) run_string_as_driver(script) handle = serve.get_deployment("driver").get_handle() assert ray.get(handle.remote()) == "OK!"
def test_scale_replicas(self, serve_instance): @serve.deployment def d(*args): return os.getpid() def check_num_replicas(num): handle = serve.get_deployment("d").get_handle() assert len(set(ray.get([handle.remote() for _ in range(50)]))) == num d.deploy() check_num_replicas(1) del d d2 = serve.get_deployment("d") d2.options(num_replicas=2).deploy() check_num_replicas(2)
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance) start = """ import ray ray.util.connect("{}") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://localhost:8000/hello").text == "hello" delete = """ import ray ray.util.connect("{}") from ray import serve serve.get_deployment("test1").delete() """.format(ray_client_instance) run_string_as_driver(delete) assert "test1" not in serve.list_backends() assert "test1" not in serve.list_endpoints()
def test_deploy_new_version(serve_instance): name = "test" @serve.deployment(name=name, version="1") def d(*args): return "1", os.getpid() d.deploy() val1, pid1 = ray.get(d.get_handle().remote()) assert val1 == "1" del d d2 = serve.get_deployment(name=name) d2.options(version="2").deploy() val2, pid2 = ray.get(d2.get_handle().remote()) assert val2 == "1" assert pid2 != pid1
def test_single_class_with_valid_ray_options(serve_instance): model = Model.options(num_cpus=1, memory=1000)._bind(2, ratio=0.3) ray_dag = model.forward._bind(InputNode()) serve_root_dag = ray_dag._apply_recursive(transform_ray_dag_to_serve_dag) deployments = extract_deployments_from_serve_dag(serve_root_dag) assert len(deployments) == 1 deployments[0].deploy() _validate_consistent_output(deployments[0], ray_dag, deployments[0].name, input=1, output=0.6) deployment = serve.get_deployment(deployments[0].name) assert deployment.ray_actor_options.get("num_cpus") == 1 assert deployment.ray_actor_options.get("memory") == 1000 assert deployment.ray_actor_options.get("runtime_env") == {}
def test_single_class_with_valid_ray_options(serve_instance): with InputNode() as dag_input: model = Model.options(num_cpus=1, memory=1000).bind(2, ratio=0.3) ray_dag = model.forward.bind(dag_input) with _DAGNodeNameGenerator() as node_name_generator: serve_root_dag = ray_dag.apply_recursive( lambda node: transform_ray_dag_to_serve_dag(node, node_name_generator) ) deployments = extract_deployments_from_serve_dag(serve_root_dag) assert len(deployments) == 1 deployments[0].deploy() _validate_consistent_python_output( deployments[0], ray_dag, deployments[0].name, input=1, output=0.6 ) deployment = serve.get_deployment(deployments[0].name) assert deployment.ray_actor_options.get("num_cpus") == 1 assert deployment.ray_actor_options.get("memory") == 1000 assert deployment.ray_actor_options.get("runtime_env") == {}
def test_handle_cache_out_of_scope(serve_instance): # https://github.com/ray-project/ray/issues/18980 initial_num_cached = len(get_global_client().handle_cache) @serve.deployment(name="f") def f(): return "hi" f.deploy() handle = serve.get_deployment("f").get_handle() handle_cache = get_global_client().handle_cache assert len(handle_cache) == initial_num_cached + 1 def sender_where_handle_goes_out_of_scope(): f = serve.get_deployment("f").get_handle() assert f is handle assert ray.get(f.remote()) == "hi" [sender_where_handle_goes_out_of_scope() for _ in range(30)] assert len(handle_cache) == initial_num_cached + 1
def get_deployment(self, name, use_list_api): if use_list_api: return serve.list_deployments()[name] else: return serve.get_deployment(name)
def __init__(self): self.handle = serve.get_deployment(deployment_name).get_handle( sync=False)
def test_deploy(ray_start_stop): # Deploys some valid config files and checks that the deployments work # Initialize serve in test to enable calling serve.list_deployments() ray.init(address="auto", namespace=RAY_INTERNAL_DASHBOARD_NAMESPACE) serve.start(detached=True) # Create absolute file names to YAML config files three_deployments = os.path.join( os.path.dirname(__file__), "test_config_files", "three_deployments.yaml" ) two_deployments = os.path.join( os.path.dirname(__file__), "test_config_files", "two_deployments.yaml" ) deny_deployment = os.path.join( os.path.dirname(__file__), "test_config_files", "deny_access.yaml" ) # Dictionary mapping test config file names to expected deployment names # and configurations. These should match the values specified in the YAML # files. configs = { three_deployments: { "shallow": { "num_replicas": 1, "response": "Hello shallow world!", }, "deep": { "num_replicas": 1, "response": "Hello deep world!", }, "one": { "num_replicas": 2, "response": "2", }, }, two_deployments: { "shallow": { "num_replicas": 3, "response": "Hello shallow world!", }, "one": { "num_replicas": 2, "response": "2", }, }, } request_url = "http://localhost:8000/" success_message_fragment = b"Sent deploy request successfully!" # Check idempotence: for _ in range(2): for config_file_name, expected_deployments in configs.items(): deploy_response = subprocess.check_output( ["serve", "deploy", config_file_name] ) assert success_message_fragment in deploy_response for name, deployment_config in expected_deployments.items(): wait_for_condition( lambda: ( requests.get(f"{request_url}{name}").text == deployment_config["response"] ), timeout=15, ) running_deployments = serve.list_deployments() # Check that running deployment names match expected deployment names assert set(running_deployments.keys()) == expected_deployments.keys() for name, deployment in running_deployments.items(): assert ( deployment.num_replicas == expected_deployments[name]["num_replicas"] ) # Deploy a deployment without HTTP access deploy_response = subprocess.check_output(["serve", "deploy", deny_deployment]) assert success_message_fragment in deploy_response wait_for_condition( lambda: requests.get(f"{request_url}shallow").status_code == 404, timeout=15 ) assert ( ray.get(serve.get_deployment("shallow").get_handle().remote()) == "Hello shallow world!" ) ray.shutdown()
async def __call__(self, req): if self.handle is None: self.handle = serve.get_deployment( deployment_name).get_handle(sync=False) obj_ref = await self.handle.remote(req) return await obj_ref
from ray import serve import os import ray ray.init(address="auto", namespace="serve") num_replicas = os.environ.get("NUM_REPLICAS", "1") serve.get_deployment("rl").options(num_replicas=int(num_replicas)).deploy()
# See code below for method 1. Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl") Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl") # Get the current list of deployments print(serve.list_deployments()) print("ServerHandle API responses: " + "--" * 5) # Method 1) Access each deployment using the ServerHandle API for _ in range(2): for d_name in ["rep-1", "rep-2"]: # Get handle to the each deployment and invoke its method. # Which replica the request is dispatched to is determined # by the Router actor. handle = serve.get_deployment(d_name).get_handle() print(f"handle name : {d_name}") print(f"prediction : {ray.get(handle.remote(random()))}") print("-" * 2) print("HTTP responses: " + "--" * 5) # Method 2) Access deployment via HTTP Request for _ in range(2): for d_name in ["rep-1", "rep-2"]: # Send HTTP request along with data payload url = f"http://127.0.0.1:8000/{d_name}" print(f"handle name : {d_name}") print( f"prediction : {requests.get(url, params= {'data': random()}).text}" )
def check_num_replicas(num): handle = serve.get_deployment(name=name).get_handle() assert len(set(ray.get([handle.remote() for _ in range(50)]))) == num