def test_deploy_app_overwrite_apps(self, client: ServeControllerClient): """Check that overwriting a live app with a new one works.""" # Launch first graph. Its driver's route_prefix should be "/". test_config_1 = ServeApplicationSchema.parse_obj( { "import_path": "ray.serve.tests.test_config_files.world.DagNode", } ) client.deploy_app(test_config_1) wait_for_condition( lambda: requests.get("http://localhost:8000/").text == "wonderful world" ) # Launch second graph. Its driver's route_prefix should also be "/". # "/" should lead to the new driver. test_config_2 = ServeApplicationSchema.parse_obj( { "import_path": "ray.serve.tests.test_config_files.pizza.serve_dag", } ) client.deploy_app(test_config_2) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "4 pizzas please!" )
def test_deploy_app_runtime_env(self, client: ServeControllerClient): config_template = { "import_path": "conditional_dag.serve_dag", "runtime_env": { "working_dir": ( "https://github.com/ray-project/test_dag/archive/" "76a741f6de31df78411b1f302071cde46f098418.zip" ) }, } config1 = ServeApplicationSchema.parse_obj(config_template) client.deploy_app(config1) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "0 pizzas please!" ) # Override the configuration config_template["deployments"] = [ { "name": "Adder", "ray_actor_options": { "runtime_env": {"env_vars": {"override_increment": "1"}} }, } ] config2 = ServeApplicationSchema.parse_obj(config_template) client.deploy_app(config2) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "3 pizzas please!" )
def test_deploy_app_update_config(self, client: ServeControllerClient): config = ServeApplicationSchema.parse_obj(self.get_test_config()) client.deploy_app(config) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "4 pizzas please!" ) config = self.get_test_config() config["deployments"] = [ { "name": "Adder", "user_config": { "increment": -1, }, }, ] client.deploy_app(ServeApplicationSchema.parse_obj(config)) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "1 pizzas please!" )
def test_deploy_app_with_overriden_config(self, client: ServeControllerClient): config = self.get_test_config() config["deployments"] = [ { "name": "Multiplier", "user_config": { "factor": 4, }, }, { "name": "Adder", "user_config": { "increment": 5, }, }, ] client.deploy_app(ServeApplicationSchema.parse_obj(config)) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 0]).json() == "5 pizzas please!" ) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 2]).json() == "8 pizzas please!" )
def test_deploy_app_update_timestamp(self, client: ServeControllerClient): assert client.get_serve_status().app_status.deployment_timestamp == 0 config = ServeApplicationSchema.parse_obj(self.get_test_config()) client.deploy_app(config) assert client.get_serve_status().app_status.deployment_timestamp > 0 first_deploy_time = client.get_serve_status().app_status.deployment_timestamp time.sleep(0.1) config = self.get_test_config() config["deployments"] = [ { "name": "Adder", "num_replicas": 2, }, ] client.deploy_app(ServeApplicationSchema.parse_obj(config)) assert ( client.get_serve_status().app_status.deployment_timestamp > first_deploy_time ) assert client.get_serve_status().app_status.status in { ApplicationStatus.DEPLOYING, ApplicationStatus.RUNNING, }
def test_deploy_app_update_num_replicas(self, client: ServeControllerClient): config = ServeApplicationSchema.parse_obj(self.get_test_config()) client.deploy_app(config) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2] ).json() == "4 pizzas please!") wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3] ).json() == "9 pizzas please!") actors = ray.util.list_named_actors(all_namespaces=True) config = self.get_test_config() config["deployments"] = [ { "name": "Adder", "num_replicas": 2, "user_config": { "increment": 0, }, "ray_actor_options": { "num_cpus": 0.1 }, }, { "name": "Multiplier", "num_replicas": 3, "user_config": { "factor": 0, }, "ray_actor_options": { "num_cpus": 0.1 }, }, ] client.deploy_app(ServeApplicationSchema.parse_obj(config)) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2] ).json() == "2 pizzas please!") wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3] ).json() == "0 pizzas please!") wait_for_condition( lambda: client.get_serve_status().app_status.status == ApplicationStatus.RUNNING, timeout=15, ) updated_actors = ray.util.list_named_actors(all_namespaces=True) assert len(updated_actors) == len(actors) + 3
def test_deploy_app_basic(self, client: ServeControllerClient): config = ServeApplicationSchema.parse_obj(self.get_test_config()) client.deploy_app(config) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2] ).json() == "4 pizzas please!") wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3] ).json() == "9 pizzas please!")
def _connect( _override_controller_namespace: Optional[str] = None, ) -> ServeControllerClient: """Connect to an existing Serve instance on this Ray cluster. If calling from the driver program, the Serve instance on this Ray cluster must first have been initialized using `serve.start(detached=True)`. If called from within a replica, this will connect to the same Serve instance that the replica is running in. Args: _override_controller_namespace (Optional[str]): The namespace to use when looking for the controller. If None, Serve recalculates the controller's namespace using get_controller_namespace(). Raises: RayServeException: if there is no Serve controller actor in the expected namespace. """ # Initialize ray if needed. ray.worker.global_worker.filter_logs_by_job = False if not ray.is_initialized(): ray.init(namespace="serve") # When running inside of a replica, _INTERNAL_REPLICA_CONTEXT is set to # ensure that the correct instance is connected to. if _INTERNAL_REPLICA_CONTEXT is None: controller_name = SERVE_CONTROLLER_NAME controller_namespace = get_controller_namespace( detached=True, _override_controller_namespace=_override_controller_namespace) else: controller_name = _INTERNAL_REPLICA_CONTEXT._internal_controller_name controller_namespace = _INTERNAL_REPLICA_CONTEXT._internal_controller_namespace # Try to get serve controller if it exists try: controller = ray.get_actor(controller_name, namespace=controller_namespace) except ValueError: raise RayServeException("There is no " "instance running on this Ray cluster. Please " "call `serve.start(detached=True) to start " "one.") client = ServeControllerClient( controller, controller_name, detached=True, _override_controller_namespace=_override_controller_namespace, ) set_global_client(client) return client
def _connect() -> ServeControllerClient: """Connect to an existing Serve application on this Ray cluster. If calling from the driver program, the Serve app on this Ray cluster must first have been initialized using `serve.start(detached=True)`. If called from within a replica, this will connect to the same Serve app that the replica is running in. Returns: ServeControllerClient that encapsulates a Ray actor handle to the existing Serve application's Serve Controller. Raises: RayServeException: if there is no running Serve controller actor. """ # Initialize ray if needed. ray._private.worker.global_worker.filter_logs_by_job = False if not ray.is_initialized(): ray.init(namespace=SERVE_NAMESPACE) # When running inside of a replica, _INTERNAL_REPLICA_CONTEXT is set to # ensure that the correct instance is connected to. if _INTERNAL_REPLICA_CONTEXT is None: controller_name = SERVE_CONTROLLER_NAME else: controller_name = _INTERNAL_REPLICA_CONTEXT._internal_controller_name # Try to get serve controller if it exists try: controller = ray.get_actor(controller_name, namespace=SERVE_NAMESPACE) except ValueError: raise RayServeException("There is no " "instance running on this Ray cluster. Please " "call `serve.start(detached=True) to start " "one.") client = ServeControllerClient( controller, controller_name, detached=True, ) set_global_client(client) return client
def test_controller_recover_and_deploy(self, client: ServeControllerClient): """Ensure that in-progress deploy can finish even after controller dies.""" config = ServeApplicationSchema.parse_obj(self.get_test_config()) client.deploy_app(config) # Wait for app to deploy wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "4 pizzas please!" ) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3]).json() == "9 pizzas please!" ) deployment_timestamp = client.get_serve_status().app_status.deployment_timestamp # Delete all deployments, but don't update config client.delete_deployments( ["Router", "Multiplier", "Adder", "create_order", "DAGDriver"] ) ray.kill(client._controller, no_restart=False) # When controller restarts, it should redeploy config automatically wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json() == "4 pizzas please!" ) wait_for_condition( lambda: requests.post("http://localhost:8000/", json=["MUL", 3]).json() == "9 pizzas please!" ) assert ( deployment_timestamp == client.get_serve_status().app_status.deployment_timestamp ) serve.shutdown() client = serve.start(detached=True) # Ensure config checkpoint has been deleted assert client.get_serve_status().app_status.deployment_timestamp == 0
def start( detached: bool = False, http_options: Optional[Union[dict, HTTPOptions]] = None, dedicated_cpu: bool = False, _checkpoint_path: str = DEFAULT_CHECKPOINT_PATH, **kwargs, ) -> ServeControllerClient: """Initialize a serve instance. By default, the instance will be scoped to the lifetime of the returned Client object (or when the script exits). If detached is set to True, the instance will instead persist until serve.shutdown() is called. This is only relevant if connecting to a long-running Ray cluster (e.g., with ray.init(address="auto") or ray.init("ray://<remote_addr>")). Args: detached: Whether not the instance should be detached from this script. If set, the instance will live on the Ray cluster until it is explicitly stopped with serve.shutdown(). http_options (Optional[Dict, serve.HTTPOptions]): Configuration options for HTTP proxy. You can pass in a dictionary or HTTPOptions object with fields: - host(str, None): Host for HTTP servers to listen on. Defaults to "127.0.0.1". To expose Serve publicly, you probably want to set this to "0.0.0.0". - port(int): Port for HTTP server. Defaults to 8000. - root_path(str): Root path to mount the serve application (for example, "/serve"). All deployment routes will be prefixed with this path. Defaults to "". - middlewares(list): A list of Starlette middlewares that will be applied to the HTTP servers in the cluster. Defaults to []. - location(str, serve.config.DeploymentMode): The deployment location of HTTP servers: - "HeadOnly": start one HTTP server on the head node. Serve assumes the head node is the node you executed serve.start on. This is the default. - "EveryNode": start one HTTP server per node. - "NoServer" or None: disable HTTP server. - num_cpus (int): The number of CPU cores to reserve for each internal Serve HTTP proxy actor. Defaults to 0. dedicated_cpu: Whether to reserve a CPU core for the internal Serve controller actor. Defaults to False. """ usage_lib.record_library_usage("serve") http_deprecated_args = ["http_host", "http_port", "http_middlewares"] for key in http_deprecated_args: if key in kwargs: raise ValueError( f"{key} is deprecated, please use serve.start(http_options=" f'{{"{key}": {kwargs[key]}}}) instead.') # Initialize ray if needed. ray._private.worker.global_worker.filter_logs_by_job = False if not ray.is_initialized(): ray.init(namespace=SERVE_NAMESPACE) try: client = get_global_client(_health_check_controller=True) logger.info( f'Connecting to existing Serve app in namespace "{SERVE_NAMESPACE}".' ) _check_http_and_checkpoint_options(client, http_options, _checkpoint_path) return client except RayServeException: pass if detached: controller_name = SERVE_CONTROLLER_NAME else: controller_name = format_actor_name(get_random_letters(), SERVE_CONTROLLER_NAME) if isinstance(http_options, dict): http_options = HTTPOptions.parse_obj(http_options) if http_options is None: http_options = HTTPOptions() controller = ServeController.options( num_cpus=1 if dedicated_cpu else 0, name=controller_name, lifetime="detached" if detached else None, max_restarts=-1, max_task_retries=-1, # Pin Serve controller on the head node. resources={ get_current_node_resource_key(): 0.01 }, namespace=SERVE_NAMESPACE, max_concurrency=CONTROLLER_MAX_CONCURRENCY, ).remote( controller_name, http_options, _checkpoint_path, detached=detached, ) proxy_handles = ray.get(controller.get_http_proxies.remote()) if len(proxy_handles) > 0: try: ray.get( [handle.ready.remote() for handle in proxy_handles.values()], timeout=HTTP_PROXY_TIMEOUT, ) except ray.exceptions.GetTimeoutError: raise TimeoutError( f"HTTP proxies not available after {HTTP_PROXY_TIMEOUT}s.") client = ServeControllerClient( controller, controller_name, detached=detached, ) set_global_client(client) logger.info(f"Started{' detached ' if detached else ' '}Serve instance in " f'namespace "{SERVE_NAMESPACE}".') return client