def test_replica_config_default_memory_none(memory_omitted_options): """Checks that ReplicaConfig's default memory is None.""" if memory_omitted_options is None: config = ReplicaConfig("fake.import_path") assert config.ray_actor_options["memory"] is None config = ReplicaConfig("fake.import_path", ray_actor_options=memory_omitted_options) assert config.ray_actor_options["memory"] is None
def test_controller_starts_java_replica(shutdown_only): # noqa: F811 ray.init( num_cpus=8, namespace="default_test_namespace", # A dummy code search path to enable cross language. job_config=JobConfig(code_search_path=["."]), ) client = serve.start(detached=True) controller = client._controller config = DeploymentConfig() config.deployment_language = JAVA config.is_cross_language = True replica_config = ReplicaConfig( "io.ray.serve.util.ExampleEchoDeployment", init_args=["my_prefix "], ) # Deploy it deployment_name = "my_java" updating = ray.get( controller.deploy.remote( name=deployment_name, deployment_config_proto_bytes=config.to_proto_bytes(), replica_config_proto_bytes=replica_config.to_proto_bytes(), route_prefix=None, deployer_job_id=ray.get_runtime_context().job_id, ) ) assert updating client._wait_for_deployment_healthy(deployment_name) # Let's try to call it! all_handles = ray.get(controller._all_running_replicas.remote()) backend_handle = all_handles["my_java"][0].actor_handle out = backend_handle.handleRequest.remote( RequestMetadata( request_id="id-1", endpoint="endpoint", call_method="call", ).SerializeToString(), RequestWrapper(body=msgpack_serialize("hello")).SerializeToString(), ) assert ray.get(out) == "my_prefix hello" ray.get(controller.delete_deployment.remote(deployment_name)) client._wait_for_deployment_deleted(deployment_name)
def from_proto(cls, proto: DeploymentInfoProto): deployment_config = (DeploymentConfig.from_proto( proto.deployment_config) if proto.deployment_config else None) data = { "deployment_config": deployment_config, "replica_config": ReplicaConfig.from_proto( proto.replica_config, deployment_config.deployment_language if deployment_config else DeploymentLanguage.PYTHON, ), "start_time_ms": proto.start_time_ms, "actor_name": proto.actor_name if proto.actor_name != "" else None, "serialized_deployment_def": proto.serialized_deployment_def if proto.serialized_deployment_def != b"" else None, "version": proto.version if proto.version != "" else None, "end_time_ms": proto.end_time_ms if proto.end_time_ms != 0 else None, "deployer_job_id": ray.get_runtime_context().job_id, } return cls(**data)
def from_proto(cls, proto: DeploymentInfoProto): deployment_config = (DeploymentConfig.from_proto( proto.deployment_config) if proto.deployment_config else None) data = { "deployment_config": deployment_config, "replica_config": ReplicaConfig.from_proto( proto.replica_config, deployment_config.needs_pickle() if deployment_config else True, ), "start_time_ms": proto.start_time_ms, "actor_name": proto.actor_name if proto.actor_name != "" else None, "version": proto.version if proto.version != "" else None, "end_time_ms": proto.end_time_ms if proto.end_time_ms != 0 else None, "deployer_job_id": ray.get_runtime_context().job_id, } return cls(**data)
def create_backend(backend_tag, func_or_class, *actor_init_args, ray_actor_options=None, config=None): """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config: (optional) configuration options for this backend. """ if config is None: config = {} if not isinstance(config, dict): raise TypeError("config must be a dictionary.") replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) backend_config = BackendConfig(config, replica_config.accepts_batches) retry_actor_failures(master_actor.create_backend, backend_tag, backend_config, replica_config)
def deploy(self, name: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, version: Optional[str] = None, _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig(backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking, is_asgi_app=replica_config.is_asgi_app, path_prefix=replica_config.path_prefix, ) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() goal_ref = self._controller.deploy.remote(name, backend_config, replica_config, version) if _blocking: self._wait_for_goal(goal_ref) else: return goal_ref
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str]) -> Optional[GoalId]: # By default the path prefix is the deployment name. if replica_config.path_prefix is None: replica_config.path_prefix = f"/{name}" # Backend config should be synchronized so the backend worker # is aware of it. backend_config.internal_metadata.path_prefix = f"/{name}" if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. # {wildcard:path} is used so HTTPProxy's Starlette router can match # arbitrary path. http_route = f"{replica_config.path_prefix}" + "/{wildcard:path}" # https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods http_methods = [ "GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH" ] else: http_route = replica_config.path_prefix # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: if version is None: version = RESERVED_VERSION_TAG else: if version == RESERVED_VERSION_TAG: # TODO(edoakes): this is unlikely to ever be hit, but it's # still ugly and should be removed once the old codepath # can be deleted. raise ValueError( f"Version {RESERVED_VERSION_TAG} is reserved and " "cannot be used by applications.") goal_id = self.backend_state.deploy_backend( name, backend_config, replica_config, version) self.endpoint_state.create_endpoint( name, http_route, http_methods, TrafficPolicy({ name: 1.0 }), python_methods=python_methods) return goal_id
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, route_prefix: Optional[str], deployer_job_id: Union["ray._raylet.JobID", bytes], ) -> bool: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes ) version = deployment_config.version replica_config = ReplicaConfig.from_proto_bytes( replica_config_proto_bytes, deployment_config.needs_pickle() ) autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None if isinstance(deployer_job_id, bytes): deployer_job_id = ray.JobID.from_int( int.from_bytes(deployer_job_id, "little") ) deployment_info = DeploymentInfo( actor_name=name, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? updating = self.deployment_state_manager.deploy(name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) else: self.endpoint_state.delete_endpoint(name) return updating
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str]) -> Optional[GoalId]: # By default the path prefix is the deployment name. if replica_config.path_prefix is None: replica_config.path_prefix = f"/{name}" # Backend config should be synchronized so the backend worker # is aware of it. backend_config.internal_metadata.path_prefix = f"/{name}" else: if ("{" in replica_config.path_prefix or "}" in replica_config.path_prefix): raise ValueError( "Wildcard routes are not supported for deployment paths. " "Please use @serve.ingress with FastAPI instead.") if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. # {wildcard:path} is used so HTTPProxy's Starlette router can match # arbitrary path. path_prefix = replica_config.path_prefix if path_prefix.endswith("/"): path_prefix = path_prefix[:-1] http_route = path_prefix + WILDCARD_PATH_SUFFIX http_methods = ALL_HTTP_METHODS else: http_route = replica_config.path_prefix # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo(worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) self.endpoint_state.create_endpoint(name, http_route, http_methods, TrafficPolicy({name: 1.0}), python_methods=python_methods) return goal_id
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str]) -> Optional[GoalId]: # By default the path prefix is the deployment name. if replica_config.path_prefix is None: replica_config.path_prefix = f"/{name}" # Backend config should be synchronized so the backend worker # is aware of it. backend_config.internal_metadata.path_prefix = f"/{name}" if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. # {wildcard:path} is used so HTTPProxy's Starlette router can match # arbitrary path. http_route = f"{replica_config.path_prefix}" + "/{wildcard:path}" # https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods http_methods = [ "GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH" ] else: http_route = replica_config.path_prefix # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo( worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) self.endpoint_state.create_endpoint( name, http_route, http_methods, TrafficPolicy({ name: 1.0 }), python_methods=python_methods) return goal_id
def create_backend(backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Dict[str, Any]] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (optional): configuration options for this backend. Supported options: - "num_replicas": number of worker processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. """ if backend_tag in list_backends(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if not isinstance(config, dict): raise TypeError("config must be a dictionary.") replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) backend_config = BackendConfig(config, replica_config.accepts_batches, replica_config.is_blocking) ray.get( controller.create_backend.remote(backend_tag, backend_config, replica_config))
def test_replica_config_lazy_deserialization(self): def f(): return "Check this out!" f_serialized = cloudpickle.dumps(f) config = ReplicaConfig( "f", f_serialized, cloudpickle.dumps(()), cloudpickle.dumps({}), {} ) assert config.serialized_deployment_def == f_serialized assert config._deployment_def is None assert config.serialized_init_args == cloudpickle.dumps(tuple()) assert config._init_args is None assert config.serialized_init_kwargs == cloudpickle.dumps(dict()) assert config._init_kwargs is None assert isinstance(config.ray_actor_options, dict) assert isinstance(config.resource_dict, dict) assert config.deployment_def() == "Check this out!" assert config.init_args == tuple() assert config.init_kwargs == dict()
def test_create_backend_idempotent(serve_instance): def f(_): return "hello" controller = serve.api._global_client._controller replica_config = ReplicaConfig(f) backend_config = BackendConfig(num_replicas=1) for i in range(10): ray.get( controller.wait_for_goal.remote( controller.create_backend.remote("my_backend", backend_config, replica_config))) assert len(ray.get(controller.get_all_backends.remote())) == 1 serve.create_endpoint( "my_endpoint", backend="my_backend", route="/my_route") assert requests.get("http://127.0.0.1:8000/my_route").text == "hello"
def test_create_backend_idempotent(serve_instance): serve.init() def f(): return "hello" controller = serve.api._get_controller() replica_config = ReplicaConfig(f) backend_config = BackendConfig({"num_replicas": 1}) for i in range(10): ray.get( controller.create_backend.remote("my_backend", backend_config, replica_config)) assert len(ray.get(controller.get_all_backends.remote())) == 1 serve.create_endpoint("my_endpoint", backend="my_backend", route="/my_route") assert requests.get("http://127.0.0.1:8000/my_route").text == "hello"
def deploy( self, name: str, deployment_def: Union[Callable, Type[Callable], str], init_args: Tuple[Any], init_kwargs: Dict[Any, Any], ray_actor_options: Optional[Dict] = None, config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None, version: Optional[str] = None, prev_version: Optional[str] = None, route_prefix: Optional[str] = None, url: Optional[str] = None, _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} curr_job_env = ray.get_runtime_context().runtime_env if "runtime_env" in ray_actor_options: ray_actor_options["runtime_env"].setdefault( "working_dir", curr_job_env.get("working_dir")) else: ray_actor_options["runtime_env"] = curr_job_env replica_config = ReplicaConfig( deployment_def, init_args=init_args, init_kwargs=init_kwargs, ray_actor_options=ray_actor_options) if isinstance(config, dict): deployment_config = DeploymentConfig.parse_obj(config) elif isinstance(config, DeploymentConfig): deployment_config = config else: raise TypeError( "config must be a DeploymentConfig or a dictionary.") if deployment_config.autoscaling_config is not None and \ deployment_config.max_concurrent_queries < deployment_config. \ autoscaling_config.target_num_ongoing_requests_per_replica: logger.warning("Autoscaling will never happen, " "because 'max_concurrent_queries' is less than " "'target_num_ongoing_requests_per_replica' now.") goal_id, updating = ray.get( self._controller.deploy.remote(name, deployment_config.to_proto_bytes(), replica_config, version, prev_version, route_prefix, ray.get_runtime_context().job_id)) tag = f"component=serve deployment={name}" if updating: msg = f"Updating deployment '{name}'" if version is not None: msg += f" to version '{version}'" logger.info(f"{msg}. {tag}") else: logger.info(f"Deployment '{name}' is already at version " f"'{version}', not updating. {tag}") if _blocking: self._wait_for_goal(goal_id) if url is not None: url_part = f" at `{url}`" else: url_part = "" logger.info( f"Deployment '{name}{':'+version if version else ''}' is ready" f"{url_part}. {tag}") else: return goal_id
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, route_prefix: Optional[str], deployer_job_id: "ray._raylet.JobID", ) -> bool: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) version = deployment_config.version prev_version = deployment_config.prev_version replica_config = ReplicaConfig.from_proto_bytes( replica_config_proto_bytes, deployment_config.deployment_language) if prev_version is not None: existing_deployment_info = self.deployment_state_manager.get_deployment( name) if existing_deployment_info is None or not existing_deployment_info.version: raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_deployment_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_deployment_info.version}'.") autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None deployment_info = DeploymentInfo( actor_name=name, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? updating = self.deployment_state_manager.deploy(name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) else: self.endpoint_state.delete_endpoint(name) return updating
def deploy(self, name: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, version: Optional[str] = None, prev_version: Optional[str] = None, route_prefix: Optional[str] = None, url: str = "", _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} curr_job_env = ray.get_runtime_context().runtime_env if "runtime_env" in ray_actor_options: ray_actor_options["runtime_env"].setdefault( "uris", curr_job_env.get("uris")) else: ray_actor_options["runtime_env"] = curr_job_env if "working_dir" in ray_actor_options["runtime_env"]: del ray_actor_options["runtime_env"]["working_dir"] replica_config = ReplicaConfig(backend_def, *init_args, ray_actor_options=ray_actor_options) if isinstance(config, dict): backend_config = BackendConfig.parse_obj(config) elif isinstance(config, BackendConfig): backend_config = config else: raise TypeError("config must be a BackendConfig or a dictionary.") goal_id, updating = ray.get( self._controller.deploy.remote(name, backend_config.to_proto_bytes(), replica_config, version, prev_version, route_prefix, ray.get_runtime_context().job_id)) tag = f"component=serve deployment={name}" if updating: msg = f"Updating deployment '{name}'" if version is not None: msg += f" to version '{version}'" logger.info(f"{msg}. {tag}") else: logger.info(f"Deployment '{name}' is already at version " f"'{version}', not updating. {tag}") if _blocking: self._wait_for_goal(goal_id) logger.info( f"Deployment '{name}{':'+version if version else ''}' is ready" f" at `{url}`. {tag}") else: return goal_id
def test_replica_config_validation(): class Class: pass def function(_): pass ReplicaConfig(Class) ReplicaConfig(function) with pytest.raises(TypeError): ReplicaConfig(Class()) # Check ray_actor_options validation. ReplicaConfig( Class, ray_actor_options={ "num_cpus": 1.0, "num_gpus": 10, "resources": { "abc": 1.0 }, "memory": 1000000.0, "object_store_memory": 1000000, }) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options=1.0) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options=False) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"num_cpus": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"num_cpus": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"num_gpus": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"num_gpus": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"memory": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"memory": -1}) with pytest.raises(TypeError): ReplicaConfig( Class, ray_actor_options={"object_store_memory": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"object_store_memory": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"resources": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"name": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"lifetime": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"max_restarts": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"placement_group": None})
def create_backend(self, backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, env: Optional[CondaEnv] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. env (serve.CondaEnv, optional): conda environment to run this backend in. Requires the caller to be running in an activated conda environment (not necessarily ``env``), and requires ``env`` to be an existing conda environment on all nodes. If ``env`` is not provided but conda is activated, the backend will run in the conda environment of the caller. """ if backend_tag in self.list_backends().keys(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} if env is None: # If conda is activated, default to conda env of this process. if os.environ.get("CONDA_PREFIX"): if "override_environment_variables" not in ray_actor_options: ray_actor_options["override_environment_variables"] = {} ray_actor_options["override_environment_variables"].update( {"PYTHONHOME": os.environ.get("CONDA_PREFIX")}) else: conda_env_dir = get_conda_env_dir(env.name) ray_actor_options.update( override_environment_variables={"PYTHONHOME": conda_env_dir}) replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() ray.get( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
def test_replica_config_validation(self): class Class: pass def function(_): pass ReplicaConfig.create(Class) ReplicaConfig.create(function) with pytest.raises(TypeError): ReplicaConfig.create(Class()) # Check ray_actor_options validation. ReplicaConfig.create( Class, tuple(), dict(), ray_actor_options={ "num_cpus": 1.0, "num_gpus": 10, "resources": {"abc": 1.0}, "memory": 1000000.0, "object_store_memory": 1000000, }, ) with pytest.raises(TypeError): ReplicaConfig.create(Class, ray_actor_options=1.0) with pytest.raises(TypeError): ReplicaConfig.create(Class, ray_actor_options=False) with pytest.raises(TypeError): ReplicaConfig.create(Class, ray_actor_options={"num_cpus": "hello"}) with pytest.raises(ValueError): ReplicaConfig.create(Class, ray_actor_options={"num_cpus": -1}) with pytest.raises(TypeError): ReplicaConfig.create(Class, ray_actor_options={"num_gpus": "hello"}) with pytest.raises(ValueError): ReplicaConfig.create(Class, ray_actor_options={"num_gpus": -1}) with pytest.raises(TypeError): ReplicaConfig.create(Class, ray_actor_options={"memory": "hello"}) with pytest.raises(ValueError): ReplicaConfig.create(Class, ray_actor_options={"memory": -1}) with pytest.raises(TypeError): ReplicaConfig.create( Class, ray_actor_options={"object_store_memory": "hello"} ) with pytest.raises(ValueError): ReplicaConfig.create(Class, ray_actor_options={"object_store_memory": -1}) with pytest.raises(TypeError): ReplicaConfig.create(Class, ray_actor_options={"resources": []}) disallowed_ray_actor_options = { "max_concurrency", "max_restarts", "max_task_retries", "name", "namespace", "lifetime", "placement_group", "placement_group_bundle_index", "placement_group_capture_child_tasks", "max_pending_calls", "scheduling_strategy", "get_if_exists", "_metadata", } for option in disallowed_ray_actor_options: with pytest.raises(ValueError): ReplicaConfig.create(Class, ray_actor_options={option: None})
def get_deploy_args( self, name: str, deployment_def: Union[Callable, Type[Callable], str], init_args: Tuple[Any], init_kwargs: Dict[Any, Any], ray_actor_options: Optional[Dict] = None, config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None, version: Optional[str] = None, route_prefix: Optional[str] = None, ) -> Dict: """ Takes a deployment's configuration, and returns the arguments needed for the controller to deploy it. """ if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} curr_job_env = ray.get_runtime_context().runtime_env if "runtime_env" in ray_actor_options: # It is illegal to set field working_dir to None. if curr_job_env.get("working_dir") is not None: ray_actor_options["runtime_env"].setdefault( "working_dir", curr_job_env.get("working_dir")) else: ray_actor_options["runtime_env"] = curr_job_env replica_config = ReplicaConfig.create( deployment_def, init_args=init_args, init_kwargs=init_kwargs, ray_actor_options=ray_actor_options, ) if isinstance(config, dict): deployment_config = DeploymentConfig.parse_obj(config) elif isinstance(config, DeploymentConfig): deployment_config = config else: raise TypeError( "config must be a DeploymentConfig or a dictionary.") deployment_config.version = version if (deployment_config.autoscaling_config is not None and deployment_config.max_concurrent_queries < deployment_config.autoscaling_config. target_num_ongoing_requests_per_replica # noqa: E501 ): logger.warning("Autoscaling will never happen, " "because 'max_concurrent_queries' is less than " "'target_num_ongoing_requests_per_replica' now.") controller_deploy_args = { "name": name, "deployment_config_proto_bytes": deployment_config.to_proto_bytes(), "replica_config_proto_bytes": replica_config.to_proto_bytes(), "route_prefix": route_prefix, "deployer_job_id": ray.get_runtime_context().job_id, } return controller_deploy_args
def test_replica_config_validation(): class Class: pass class BatchClass: @serve.accept_batch def __call__(self): pass def function(_): pass @serve.accept_batch def batch_function(_): pass ReplicaConfig(Class) ReplicaConfig(function) with pytest.raises(TypeError): ReplicaConfig(Class()) # Check max_batch_size validation. assert not ReplicaConfig(function).accepts_batches assert not ReplicaConfig(Class).accepts_batches assert ReplicaConfig(batch_function).accepts_batches assert ReplicaConfig(BatchClass).accepts_batches # Check ray_actor_options validation. ReplicaConfig(Class, ray_actor_options={ "num_cpus": 1.0, "num_gpus": 10, "resources": { "abc": 1.0 }, "memory": 1000000.0, "object_store_memory": 1000000, }) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options=1.0) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options=False) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"num_cpus": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"num_cpus": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"num_gpus": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"num_gpus": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"memory": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"memory": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"object_store_memory": "hello"}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"object_store_memory": -1}) with pytest.raises(TypeError): ReplicaConfig(Class, ray_actor_options={"resources": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"name": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"lifetime": None}) with pytest.raises(ValueError): ReplicaConfig(Class, ray_actor_options={"max_restarts": None})
def create_backend( self, backend_tag: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None ) -> None: """Create a backend with the provided tag. Args: backend_tag (str): a unique tag assign to identify this backend. backend_def (callable, class, str): a function or class implementing __call__ and returning a JSON-serializable object or a Starlette Response object. A string import path can also be provided (e.g., "my_module.MyClass"), in which case the underlying function or class will be imported dynamically in the worker replicas. *init_args (optional): the arguments to pass to the class initialization method. Not valid if backend_def is a function. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. """ if backend_tag in self.list_backends(_internal=True).keys(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig( backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() self._wait_for_goal( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))