def update_backend_config( self, backend_tag: str, config_options: Union[BackendConfig, Dict[str, Any]]) -> None: """Update a backend configuration for a backend tag. Keys not specified in the passed will be left unchanged. Args: backend_tag(str): A registered backend. config_options(dict, serve.BackendConfig): Backend config options to update. Either a BackendConfig object or a dict mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. """ if not isinstance(config_options, (BackendConfig, dict)): raise TypeError( "config_options must be a BackendConfig or dictionary.") if isinstance(config_options, dict): config_options = BackendConfig.parse_obj(config_options) ray.get( self._controller.update_backend_config.remote( backend_tag, config_options))
def deploy(self, name: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, version: Optional[str] = None, _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig(backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking, is_asgi_app=replica_config.is_asgi_app, path_prefix=replica_config.path_prefix, ) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() goal_ref = self._controller.deploy.remote(name, backend_config, replica_config, version) if _blocking: self._wait_for_goal(goal_ref) else: return goal_ref
def create_backend(self, backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, env: Optional[CondaEnv] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. env (serve.CondaEnv, optional): conda environment to run this backend in. Requires the caller to be running in an activated conda environment (not necessarily ``env``), and requires ``env`` to be an existing conda environment on all nodes. If ``env`` is not provided but conda is activated, the backend will run in the conda environment of the caller. """ if backend_tag in self.list_backends().keys(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} if env is None: # If conda is activated, default to conda env of this process. if os.environ.get("CONDA_PREFIX"): if "override_environment_variables" not in ray_actor_options: ray_actor_options["override_environment_variables"] = {} ray_actor_options["override_environment_variables"].update( {"PYTHONHOME": os.environ.get("CONDA_PREFIX")}) else: conda_env_dir = get_conda_env_dir(env.name) ray_actor_options.update( override_environment_variables={"PYTHONHOME": conda_env_dir}) replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() ray.get( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
def create_backend( self, backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of worker processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. """ if backend_tag in self.list_backends(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() ray.get( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
def create_backend( self, backend_tag: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None ) -> None: """Create a backend with the provided tag. Args: backend_tag (str): a unique tag assign to identify this backend. backend_def (callable, class, str): a function or class implementing __call__ and returning a JSON-serializable object or a Starlette Response object. A string import path can also be provided (e.g., "my_module.MyClass"), in which case the underlying function or class will be imported dynamically in the worker replicas. *init_args (optional): the arguments to pass to the class initialization method. Not valid if backend_def is a function. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. """ if backend_tag in self.list_backends(_internal=True).keys(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig( backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() self._wait_for_goal( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
def deploy(self, name: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, version: Optional[str] = None, prev_version: Optional[str] = None, route_prefix: Optional[str] = None, url: str = "", _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} curr_job_env = ray.get_runtime_context().runtime_env if "runtime_env" in ray_actor_options: ray_actor_options["runtime_env"].setdefault( "uris", curr_job_env.get("uris")) else: ray_actor_options["runtime_env"] = curr_job_env if "working_dir" in ray_actor_options["runtime_env"]: del ray_actor_options["runtime_env"]["working_dir"] replica_config = ReplicaConfig(backend_def, *init_args, ray_actor_options=ray_actor_options) if isinstance(config, dict): backend_config = BackendConfig.parse_obj(config) elif isinstance(config, BackendConfig): backend_config = config else: raise TypeError("config must be a BackendConfig or a dictionary.") goal_id, updating = ray.get( self._controller.deploy.remote(name, backend_config.to_proto_bytes(), replica_config, version, prev_version, route_prefix, ray.get_runtime_context().job_id)) tag = f"component=serve deployment={name}" if updating: msg = f"Updating deployment '{name}'" if version is not None: msg += f" to version '{version}'" logger.info(f"{msg}. {tag}") else: logger.info(f"Deployment '{name}' is already at version " f"'{version}', not updating. {tag}") if _blocking: self._wait_for_goal(goal_id) logger.info( f"Deployment '{name}{':'+version if version else ''}' is ready" f" at `{url}`. {tag}") else: return goal_id