def run( config_or_import_path: str, runtime_env: str, runtime_env_json: str, working_dir: str, app_dir: str, address: str, host: str, port: int, blocking: bool, ): sys.path.insert(0, app_dir) final_runtime_env = parse_runtime_env_args( runtime_env=runtime_env, runtime_env_json=runtime_env_json, working_dir=working_dir, ) app_or_node = None if pathlib.Path(config_or_import_path).is_file(): config_path = config_or_import_path cli_logger.print(f"Deploying from config file: '{config_path}'.") with open(config_path, "r") as config_file: app_or_node = Application.from_yaml(config_file) else: import_path = config_or_import_path cli_logger.print(f"Deploying from import path: '{import_path}'.") app_or_node = import_attr(import_path) # Setting the runtime_env here will set defaults for the deployments. ray.init(address=address, namespace="serve", runtime_env=final_runtime_env) try: serve.run(app_or_node, host=host, port=port) cli_logger.success("Deployed successfully.") if blocking: while True: # Block, letting Ray print logs to the terminal. time.sleep(10) except KeyboardInterrupt: cli_logger.info("Got KeyboardInterrupt, shutting down...") serve.shutdown() sys.exit()
def test_dedicated_cpu(controller_cpu, num_proxy_cpus, ray_cluster): cluster = ray_cluster num_cluster_cpus = 8 head_node = cluster.add_node(num_cpus=num_cluster_cpus) ray.init(head_node.address) wait_for_condition(lambda: ray.cluster_resources().get("CPU") == num_cluster_cpus) num_cpus_used = int(controller_cpu) + num_proxy_cpus serve.start( dedicated_cpu=controller_cpu, http_options=HTTPOptions(num_cpus=num_proxy_cpus) ) available_cpus = num_cluster_cpus - num_cpus_used wait_for_condition(lambda: (ray.available_resources().get("CPU") == available_cpus)) serve.shutdown() ray.shutdown()
def test_no_http(ray_shutdown): # The following should have the same effect. options = [ { "http_host": None }, { "http_options": { "host": None } }, { "http_options": { "location": None } }, { "http_options": { "location": "NoServer" } }, ] ray.init(num_cpus=16) for i, option in enumerate(options): print(f"[{i+1}/{len(options)}] Running with {option}") serve.start(**option) # Only controller actor should exist live_actors = [ actor for actor in ray.state.actors().values() if actor["State"] == ray.gcs_utils.ActorTableData.ALIVE ] assert len(live_actors) == 1 controller = serve.api._global_client._controller assert len(ray.get(controller.get_http_proxies.remote())) == 0 # Test that the handle still works. @serve.deployment def hello(*args): return "hello" hello.deploy() assert ray.get(hello.get_handle().remote()) == "hello" serve.shutdown()
def test_serve_application_to_schema_to_serve_application(): @serve.deployment( num_replicas=1, route_prefix="/hello", ) def f1(): # The body of this function doesn't matter. See the comment in # test_deployment_to_schema_to_deployment. pass @serve.deployment( num_replicas=2, route_prefix="/hi", ) def f2(): pass f1._func_or_class = "ray.serve.tests.test_schema.global_f" f2._func_or_class = "ray.serve.tests.test_schema.global_f" deployments = schema_to_serve_application(serve_application_to_schema([f1, f2])) assert deployments[0].num_replicas == 1 assert deployments[0].route_prefix == "/hello" assert deployments[1].num_replicas == 2 assert deployments[1].route_prefix == "/hi" serve.start() deployments[0].deploy() deployments[1].deploy() assert ray.get(deployments[0].get_handle().remote()) == "Hello world!" assert requests.get("http://localhost:8000/hello").text == "Hello world!" assert ray.get(deployments[1].get_handle().remote()) == "Hello world!" assert requests.get("http://localhost:8000/hi").text == "Hello world!" # Check statuses statuses = serve_application_status_to_schema(get_deployment_statuses()).statuses deployment_names = {"f1", "f2"} for deployment_status in statuses: assert deployment_status.status in {"UPDATING", "HEALTHY"} assert deployment_status.name in deployment_names deployment_names.remove(deployment_status.name) assert len(deployment_names) == 0 serve.shutdown()
def test_shutdown(ray_shutdown): ray.init(num_cpus=16) serve.start(http_options=dict(port=8003)) @serve.deployment def f(): pass serve.run(f.bind()) serve_controller_name = serve.context._global_client._controller_name actor_names = [ serve_controller_name, format_actor_name( SERVE_PROXY_NAME, serve.context._global_client._controller_name, get_all_node_ids()[0][0], ), ] def check_alive(): alive = True for actor_name in actor_names: try: ray.get_actor(actor_name, namespace=SERVE_NAMESPACE) except ValueError: alive = False return alive wait_for_condition(check_alive) serve.shutdown() with pytest.raises(RayServeException): serve.list_deployments() def check_dead(): for actor_name in actor_names: try: ray.get_actor(actor_name, namespace=SERVE_NAMESPACE) return False except ValueError: pass return True wait_for_condition(check_dead)
def test_deployment_to_schema_to_deployment(): @serve.deployment( num_replicas=3, route_prefix="/hello", ray_actor_options={ "runtime_env": { "working_dir": ( "https://github.com/shrekris-anyscale/" "test_module/archive/HEAD.zip" ), "py_modules": [ ( "https://github.com/shrekris-anyscale/" "test_deploy_group/archive/HEAD.zip" ), ], } }, ) def f(): # The body of this function doesn't matter. It gets replaced by # global_f() when the import path in f._func_or_class is overwritten. # This function is used as a convenience to apply the @serve.deployment # decorator without converting global_f() into a Deployment object. pass f._func_or_class = "ray.serve.tests.test_schema.global_f" deployment = schema_to_deployment(deployment_to_schema(f)) assert deployment.num_replicas == 3 assert deployment.route_prefix == "/hello" assert deployment.ray_actor_options["runtime_env"]["working_dir"] == ( "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip" ) assert deployment.ray_actor_options["runtime_env"]["py_modules"] == [ "https://github.com/shrekris-anyscale/test_deploy_group/archive/HEAD.zip", "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip", ] serve.start() deployment.deploy() assert ray.get(deployment.get_handle().remote()) == "Hello world!" assert requests.get("http://localhost:8000/hello").text == "Hello world!" serve.shutdown()
def test_serve_namespace(shutdown_ray, detached, ray_namespace): """Test that Serve starts in SERVE_NAMESPACE regardless of driver namespace.""" with ray.init(namespace=ray_namespace): @serve.deployment def f(*args): return "got f" serve.run(f.bind()) actors = ray.util.list_named_actors(all_namespaces=True) assert len(actors) == 3 assert all(actor["namespace"] == SERVE_NAMESPACE for actor in actors) assert requests.get("http://localhost:8000/f").text == "got f" serve.shutdown()
def test_get_serve_status(shutdown_ray): ray.init() client = serve.start() @serve.deployment def f(*args): return "Hello world" f.deploy() status_info_1 = client.get_serve_status() assert status_info_1.app_status.status == "RUNNING" assert status_info_1.deployment_statuses[0].name == "f" assert status_info_1.deployment_statuses[0].status in {"UPDATING", "HEALTHY"} serve.shutdown() ray.shutdown()
def test_standalone_actor_outside_serve(): # https://github.com/ray-project/ray/issues/20066 ray.init(num_cpus=8, namespace="serve") @ray.remote class MyActor: def ready(self): return a = MyActor.options(name="my_actor").remote() ray.get(a.ready.remote()) serve.start() serve.shutdown() ray.get(a.ready.remote()) ray.shutdown()
def test_controller_deserialization_args_and_kwargs(): """Ensures init_args and init_kwargs stay serialized in controller.""" ray.init() client = serve.start() class PidBasedString(str): pass def generate_pid_based_deserializer(pid, raw_deserializer): """Cannot be deserialized by the process with specified pid.""" def deserializer(*args): import os if os.getpid() == pid: raise RuntimeError("Cannot be deserialized by this process!") else: return raw_deserializer(*args) return deserializer PidBasedString.__reduce__ = generate_pid_based_deserializer( ray.get(client._controller.get_pid.remote()), PidBasedString.__reduce__) @serve.deployment class Echo: def __init__(self, arg_str, kwarg_str="failed"): self.arg_str = arg_str self.kwarg_str = kwarg_str def __call__(self, request): return self.arg_str + self.kwarg_str serve.run( Echo.bind(PidBasedString("hello "), kwarg_str=PidBasedString("world!"))) assert requests.get("http://localhost:8000/Echo").text == "hello world!" serve.shutdown() ray.shutdown()
def test_run_graph_task_uses_zero_cpus(): """Check that the run_graph() task uses zero CPUs.""" ray.init(num_cpus=2) client = serve.start(detached=True) config = {"import_path": "ray.serve.tests.test_standalone.WaiterNode"} config = ServeApplicationSchema.parse_obj(config) client.deploy_app(config) with pytest.raises(RuntimeError): wait_for_condition(lambda: ray.available_resources()["CPU"] < 1.9, timeout=5) wait_for_condition(lambda: requests.get("http://localhost:8000/Waiter"). text == "May I take your order?") serve.shutdown() ray.shutdown()
def test_update_num_replicas_anonymous_namespace(shutdown_ray, detached): """Test updating num_replicas with anonymous namespace.""" ray.init() serve.start(detached=detached) @serve.deployment(num_replicas=1) def f(*args): return "got f" f.deploy() num_actors = len(ray.util.list_named_actors(all_namespaces=True)) for _ in range(5): f.deploy() assert num_actors == len(ray.util.list_named_actors(all_namespaces=True)) serve.shutdown()
def test_controller_recover_and_deploy(self, client: ServeControllerClient): """Ensure that in-progress deploy can finish even after controller dies.""" config = ServeApplicationSchema.parse_obj(self.get_test_config()) client.deploy_app(config) # Wait for app to deploy wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "4 pizzas please!" ) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3]).json() == "9 pizzas please!" ) deployment_timestamp = client.get_serve_status().app_status.deployment_timestamp # Delete all deployments, but don't update config client.delete_deployments( ["Router", "Multiplier", "Adder", "create_order", "DAGDriver"] ) ray.kill(client._controller, no_restart=False) # When controller restarts, it should redeploy config automatically wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "4 pizzas please!" ) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3]).json() == "9 pizzas please!" ) assert ( deployment_timestamp == client.get_serve_status().app_status.deployment_timestamp ) serve.shutdown() client = serve.start(detached=True) # Ensure config checkpoint has been deleted assert client.get_serve_status().app_status.deployment_timestamp == 0
def test_deploy_with_overriden_namespace(shutdown_ray, detached): """Test deployments with overriden namespace.""" ray_namespace = "ray_namespace" controller_namespace = "controller_namespace" ray.init(namespace=ray_namespace) serve.start(detached=detached, _override_controller_namespace=controller_namespace) for iteration in range(2): @serve.deployment def f(*args): return f"{iteration}" f.deploy() assert requests.get("http://localhost:8000/f").text == f"{iteration}" serve.shutdown()
def test_shutdown(ray_shutdown): ray.init(num_cpus=16) serve.start(http_port=8003) @serve.deployment def f(): pass f.deploy() actor_names = [ serve.api._global_client._controller_name, format_actor_name(SERVE_PROXY_NAME, serve.api._global_client._controller_name, get_all_node_ids()[0][0]) ] def check_alive(): alive = True for actor_name in actor_names: try: ray.get_actor(actor_name) except ValueError: alive = False return alive wait_for_condition(check_alive) serve.shutdown() with pytest.raises(RayServeException): serve.list_backends() def check_dead(): for actor_name in actor_names: try: ray.get_actor(actor_name) return False except ValueError: pass return True wait_for_condition(check_dead)
def test_serve_shutdown(ray_shutdown): serve.start(detached=True) @serve.deployment class A: def __call__(self, *args): return "hi" A.deploy() assert len(serve.list_deployments()) == 1 serve.shutdown() serve.start(detached=True) assert len(serve.list_deployments()) == 0 A.deploy() assert len(serve.list_deployments()) == 1
def test_serve_namespace(ray_start_stop): """ Check that the Dashboard's Serve can interact with the Python API when they both start in the "serve namespace" """ one = dict( name="one", num_replicas=1, route_prefix="/one", ray_actor_options={"runtime_env": {"py_modules": [test_module_uri]}}, import_path="test_module.test.one", ) put_response = requests.put(GET_OR_PUT_URL, json={"deployments": [one]}, timeout=30) assert put_response.status_code == 200 ray.init(address="auto", namespace="serve") serve.start() deployments = serve.list_deployments() assert len(deployments) == 1 assert "one" in deployments serve.shutdown()
def test_http_root_url(ray_shutdown): @serve.deployment def f(_): pass root_url = "https://my.domain.dev/prefix" port = new_port() os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url serve.start(http_options=dict(port=port)) f.deploy() assert f.url == root_url + "/f" serve.shutdown() del os.environ[SERVE_ROOT_URL_ENV_KEY] port = new_port() serve.start(http_options=dict(port=port)) f.deploy() assert f.url != root_url + "/f" assert f.url == f"http://127.0.0.1:{port}/f" serve.shutdown()
def test_serve_shutdown(ray_shutdown): ray.init(namespace="serve") serve.start(detached=True) @serve.deployment class A: def __call__(self, *args): return "hi" serve.run(A.bind()) assert len(serve.list_deployments()) == 1 serve.shutdown() serve.start(detached=True) assert len(serve.list_deployments()) == 0 serve.run(A.bind()) assert len(serve.list_deployments()) == 1
def test_fixed_number_proxies(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) ray.init(head_node.address) node_ids = ray._private.state.node_ids() assert len(node_ids) == 3 with pytest.raises( pydantic.ValidationError, match="you must specify the `fixed_number_replicas` parameter.", ): serve.start( http_options={ "location": "FixedNumber", } ) serve.start( http_options={ "port": new_port(), "location": "FixedNumber", "fixed_number_replicas": 2, } ) # Only the controller and two http proxy should be started. controller_handle = get_global_client()._controller node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote()) assert len(node_to_http_actors) == 2 proxy_names_bytes = ray.get(controller_handle.get_http_proxy_names.remote()) proxy_names = ActorNameList.FromString(proxy_names_bytes) assert len(proxy_names.names) == 2 serve.shutdown() ray.shutdown() cluster.shutdown()
def test_http_root_url(ray_shutdown): @serve.deployment def f(_): pass root_url = "https://my.domain.dev/prefix" port = new_port() os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url serve.start(http_options=dict(port=port)) serve.run(f.bind()) assert f.url == root_url + "/f" serve.shutdown() ray.shutdown() del os.environ[SERVE_ROOT_URL_ENV_KEY] port = new_port() serve.start(http_options=dict(port=port)) serve.run(f.bind()) assert f.url != root_url + "/f" assert f.url == f"http://127.0.0.1:{port}/f" serve.shutdown() ray.shutdown() ray.init(runtime_env={"env_vars": {SERVE_ROOT_URL_ENV_KEY: root_url}}) port = new_port() serve.start(http_options=dict(port=port)) serve.run(f.bind()) assert f.url == root_url + "/f" serve.shutdown() ray.shutdown()
def test_controller_recover_and_delete(): """Ensure that in-progress deletion can finish even after controller dies.""" ray.init() client = serve.start() @serve.deployment( num_replicas=50, ray_actor_options={"num_cpus": 0.001}, ) def f(): pass f.deploy() actors = ray.util.list_named_actors(all_namespaces=True) client.delete_deployments(["f"], blocking=False) wait_for_condition( lambda: len(ray.util.list_named_actors(all_namespaces=True)) < len(actors) ) ray.kill(client._controller, no_restart=False) # There should still be replicas remaining assert len(ray.util.list_named_actors(all_namespaces=True)) > 2 # All replicas should be removed once the controller revives wait_for_condition( lambda: len(ray.util.list_named_actors(all_namespaces=True)) == len(actors) - 50 ) # The deployment should be deleted, meaning its state should not be stored # in the DeploymentStateManager. This can be checked by attempting to # retrieve the deployment's status through the controller. assert client.get_serve_status().get_deployment_status("f") is None serve.shutdown() ray.shutdown()
def test_idempotence_after_controller_death(ray_start_stop, use_command: bool): """Check that CLI is idempotent even if controller dies.""" config_file_name = os.path.join(os.path.dirname(__file__), "test_config_files", "basic_graph.yaml") success_message_fragment = b"Sent deploy request successfully!" deploy_response = subprocess.check_output( ["serve", "deploy", config_file_name]) assert success_message_fragment in deploy_response ray.init(address="auto", namespace=SERVE_NAMESPACE) serve.start(detached=True) wait_for_condition( lambda: len(ray.util.list_named_actors(all_namespaces=True)) == 4, timeout=15) # Kill controller if use_command: subprocess.check_output(["serve", "shutdown", "-y"]) else: serve.shutdown() status_response = subprocess.check_output(["serve", "status"]) status_info = yaml.safe_load(status_response) assert len(status_info["deployment_statuses"]) == 0 deploy_response = subprocess.check_output( ["serve", "deploy", config_file_name]) assert success_message_fragment in deploy_response # Restore testing controller serve.start(detached=True) wait_for_condition( lambda: len(ray.util.list_named_actors(all_namespaces=True)) == 4, timeout=15) serve.shutdown() ray.shutdown()
def test_status_schema_helpers(): @serve.deployment( num_replicas=1, route_prefix="/hello", ) def f1(): # The body of this function doesn't matter. See the comment in # test_deployment_to_schema_to_deployment. pass @serve.deployment( num_replicas=2, route_prefix="/hi", ) def f2(): pass f1._func_or_class = "ray.serve.tests.test_schema.global_f" f2._func_or_class = "ray.serve.tests.test_schema.global_f" serve.start() f1.deploy() f2.deploy() # Check statuses statuses = serve_application_status_to_schema( get_deployment_statuses()).statuses deployment_names = {"f1", "f2"} for deployment_status in statuses: assert deployment_status.status in {"UPDATING", "HEALTHY"} assert deployment_status.name in deployment_names deployment_names.remove(deployment_status.name) assert len(deployment_names) == 0 serve.shutdown()
def test_idempotence_after_controller_death(ray_start_stop, use_command: bool): """Check that CLI is idempotent even if controller dies.""" config_file_name = os.path.join(os.path.dirname(__file__), "test_config_files", "two_deployments.yaml") success_message_fragment = b"Sent deploy request successfully!" deploy_response = subprocess.check_output( ["serve", "deploy", config_file_name]) assert success_message_fragment in deploy_response ray.init(address="auto", namespace="serve") serve.start(detached=True) assert len(serve.list_deployments()) == 2 # Kill controller if use_command: subprocess.check_output(["serve", "shutdown"]) else: serve.shutdown() info_response = subprocess.check_output(["serve", "config"]) info = yaml.safe_load(info_response) assert "deployments" in info assert len(info["deployments"]) == 0 deploy_response = subprocess.check_output( ["serve", "deploy", config_file_name]) assert success_message_fragment in deploy_response # Restore testing controller serve.start(detached=True) assert len(serve.list_deployments()) == 2 serve.shutdown() ray.shutdown()
def ray_cluster(): cluster = Cluster() yield Cluster() serve.shutdown() ray.shutdown() cluster.shutdown()
def ray_shutdown(): yield serve.shutdown() ray.shutdown()
def crash(): subprocess.call(["ray", "stop", "--force"]) ray.shutdown() serve.shutdown()
def shutdown_ray(): yield serve.shutdown() ray.shutdown()
def shutdown(): serve.api._connect() serve.shutdown()