def schema_to_deployment(s: DeploymentSchema) -> Deployment: if s.ray_actor_options is None: ray_actor_options = None else: ray_actor_options = s.ray_actor_options.dict(exclude_unset=True) config = DeploymentConfig.from_default( ignore_none=True, num_replicas=s.num_replicas, user_config=s.user_config, max_concurrent_queries=s.max_concurrent_queries, autoscaling_config=s.autoscaling_config, graceful_shutdown_wait_loop_s=s.graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s=s.graceful_shutdown_timeout_s, health_check_period_s=s.health_check_period_s, health_check_timeout_s=s.health_check_timeout_s, ) return Deployment( func_or_class=s.import_path, name=s.name, config=config, init_args=(), init_kwargs={}, route_prefix=s.route_prefix, ray_actor_options=ray_actor_options, _internal=True, )
def test_from_default(self, ignore_none): """Check from_default() method behavior.""" # Valid parameters dc = DeploymentConfig.from_default(ignore_none=ignore_none, num_replicas=5, is_cross_language=True) assert dc.num_replicas == 5 assert dc.is_cross_language is True # Invalid parameters should raise TypeError with pytest.raises(TypeError): DeploymentConfig.from_default(ignore_none=ignore_none, num_replicas=5, is_xlang=True) # Validation should still be performed with pytest.raises(ValidationError): DeploymentConfig.from_default(ignore_none=ignore_none, num_replicas="hello world")
def test_from_default_ignore_none(self): """Check from_default()'s ignore_none parameter""" default = DeploymentConfig() # Valid parameter with None passed in should be ignored dc = DeploymentConfig.from_default(ignore_none=True, num_replicas=None) # Invalid parameter should raise TypeError no matter what with pytest.raises(TypeError): DeploymentConfig.from_default(ignore_none=True, fake=5) with pytest.raises(TypeError): DeploymentConfig.from_default(ignore_none=False, fake=5) # Validators should run no matter what dc = DeploymentConfig.from_default(ignore_none=True, max_concurrent_queries=None) assert dc.max_concurrent_queries == default.max_concurrent_queries dc = DeploymentConfig.from_default(ignore_none=False, max_concurrent_queries=None) assert dc.max_concurrent_queries is not None assert dc.max_concurrent_queries == default.max_concurrent_queries
def schema_to_deployment(s: DeploymentSchema) -> Deployment: """Creates a deployment with parameters specified in schema. The returned deployment CANNOT be deployed immediately. It's func_or_class value is an empty string (""), which is not a valid import path. The func_or_class value must be overwritten with a valid function or class before the deployment can be deployed. """ if s.ray_actor_options is None: ray_actor_options = None else: ray_actor_options = s.ray_actor_options.dict(exclude_unset=True) config = DeploymentConfig.from_default( ignore_none=True, num_replicas=s.num_replicas, user_config=s.user_config, max_concurrent_queries=s.max_concurrent_queries, autoscaling_config=s.autoscaling_config, graceful_shutdown_wait_loop_s=s.graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s=s.graceful_shutdown_timeout_s, health_check_period_s=s.health_check_period_s, health_check_timeout_s=s.health_check_timeout_s, ) return Deployment( func_or_class="", name=s.name, config=config, init_args=(), init_kwargs={}, route_prefix=s.route_prefix, ray_actor_options=ray_actor_options, _internal=True, )
def deployment( _func_or_class: Optional[Callable] = None, name: Optional[str] = None, version: Optional[str] = None, num_replicas: Optional[int] = None, init_args: Optional[Tuple[Any]] = None, init_kwargs: Optional[Dict[Any, Any]] = None, route_prefix: Union[str, None, DEFAULT] = DEFAULT.VALUE, ray_actor_options: Optional[Dict] = None, user_config: Optional[Any] = None, max_concurrent_queries: Optional[int] = None, _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None, _graceful_shutdown_wait_loop_s: Optional[float] = None, _graceful_shutdown_timeout_s: Optional[float] = None, _health_check_period_s: Optional[float] = None, _health_check_timeout_s: Optional[float] = None, ) -> Callable[[Callable], Deployment]: """Define a Serve deployment. Args: name (Optional[str]): Globally-unique name identifying this deployment. If not provided, the name of the class or function will be used. version (Optional[str]): Version of the deployment. This is used to indicate a code change for the deployment; when it is re-deployed with a version change, a rolling update of the replicas will be performed. If not provided, every deployment will be treated as a new version. num_replicas (Optional[int]): The number of processes to start up that will handle requests to this deployment. Defaults to 1. init_args (Optional[Tuple]): Positional args to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. init_kwargs (Optional[Dict]): Keyword args to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. route_prefix (Optional[str]): Requests to paths under this HTTP path prefix will be routed to this deployment. Defaults to '/{name}'. When set to 'None', no HTTP endpoint will be created. Routing is done based on longest-prefix match, so if you have deployment A with a prefix of '/a' and deployment B with a prefix of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with a '/' unless they're the root (just '/'), which acts as a catch-all. ray_actor_options: Options to be passed to the Ray actor constructor such as resource requirements. user_config (Optional[Any]): [experimental] Config to pass to the reconfigure method of the deployment. This can be updated dynamically without changing the version of the deployment and restarting its replicas. The user_config needs to be hashable to keep track of updates, so it must only contain hashable types, or hashable types nested in lists and dictionaries. max_concurrent_queries (Optional[int]): The maximum number of queries that will be sent to a replica of this deployment without receiving a response. Defaults to 100. Example: >>> from ray import serve >>> @serve.deployment(name="deployment1", version="v1") # doctest: +SKIP ... class MyDeployment: # doctest: +SKIP ... pass # doctest: +SKIP >>> MyDeployment.deploy(*init_args) # doctest: +SKIP >>> MyDeployment.options( # doctest: +SKIP ... num_replicas=2, init_args=init_args).deploy() Returns: Deployment """ # Num of replicas should not be 0. # TODO(Sihan) seperate num_replicas attribute from internal and api if num_replicas == 0: raise ValueError("num_replicas is expected to larger than 0") if num_replicas is not None and _autoscaling_config is not None: raise ValueError("Manually setting num_replicas is not allowed when " "_autoscaling_config is provided.") config = DeploymentConfig.from_default( ignore_none=True, num_replicas=num_replicas, user_config=user_config, max_concurrent_queries=max_concurrent_queries, autoscaling_config=_autoscaling_config, graceful_shutdown_wait_loop_s=_graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s=_graceful_shutdown_timeout_s, health_check_period_s=_health_check_period_s, health_check_timeout_s=_health_check_timeout_s, ) def decorator(_func_or_class): return Deployment( _func_or_class, name if name is not None else _func_or_class.__name__, config, version=version, init_args=init_args, init_kwargs=init_kwargs, route_prefix=route_prefix, ray_actor_options=ray_actor_options, _internal=True, ) # This handles both parametrized and non-parametrized usage of the # decorator. See the @serve.batch code for more details. return decorator(_func_or_class) if callable(_func_or_class) else decorator