def test_backend_config_validation(): # Test unknown key. with pytest.raises(ValidationError): BackendConfig(unknown_key=-1) # Test num_replicas validation. BackendConfig(num_replicas=1) with pytest.raises(ValidationError, match="type_error"): BackendConfig(num_replicas="hello") with pytest.raises(ValidationError, match="value_error"): BackendConfig(num_replicas=-1) # Test max_batch_size validation. BackendConfig(max_batch_size=10, internal_metadata=BackendMetadata( accepts_batches=True))._validate_complete() with pytest.raises(ValueError): BackendConfig(max_batch_size=10, internal_metadata=BackendMetadata( accepts_batches=False))._validate_complete() with pytest.raises(ValidationError, match="type_error"): BackendConfig(max_batch_size="hello") with pytest.raises(ValidationError, match="value_error"): BackendConfig(max_batch_size=0) with pytest.raises(ValidationError, match="value_error"): BackendConfig(max_batch_size=-1) # Test dynamic default for max_concurrent_queries. assert BackendConfig().max_concurrent_queries == 8 assert BackendConfig(max_batch_size=7).max_concurrent_queries == 14 assert BackendConfig(max_batch_size=10, internal_metadata=BackendMetadata( is_blocking=False)).max_concurrent_queries == 100 assert BackendConfig(max_batch_size=7, batch_wait_timeout=1.0).max_concurrent_queries == 14
async def test_servable_batch_error(serve_instance, router, mock_controller_with_name): @serve.accept_batch class ErrorBatcher: def error_different_size(self, requests): return [""] * (len(requests) + 10) def error_non_iterable(self, _): return 42 def return_np_array(self, requests): return np.array([1] * len(requests)).astype(np.int32) backend_config = BackendConfig( max_batch_size=4, internal_metadata=BackendMetadata(accepts_batches=True)) await add_servable_to_router(ErrorBatcher, router, mock_controller_with_name[0], backend_config=backend_config) with pytest.raises(RayServeException, match="doesn't preserve batch size"): different_size = make_request_param("error_different_size") await (await router.assign_request.remote(different_size)) with pytest.raises(RayServeException, match="iterable"): non_iterable = make_request_param("error_non_iterable") await (await router.assign_request.remote(non_iterable)) np_array = make_request_param("return_np_array") result_np_value = await (await router.assign_request.remote(np_array)) assert isinstance(result_np_value, np.int32)
async def test_task_runner_custom_method_batch(serve_instance, router, mock_controller_with_name): @serve.accept_batch class Batcher: def a(self, requests): return ["a-{}".format(i) for i in range(len(requests))] def b(self, requests): return ["b-{}".format(i) for i in range(len(requests))] backend_config = BackendConfig( max_batch_size=4, batch_wait_timeout=10, internal_metadata=BackendMetadata(accepts_batches=True)) await add_servable_to_router(Batcher, router, mock_controller_with_name[0], backend_config=backend_config) a_query_param = make_request_param("a") b_query_param = make_request_param("b") futures = [ await router.assign_request.remote(a_query_param) for _ in range(2) ] futures += [ await router.assign_request.remote(b_query_param) for _ in range(2) ] gathered = await asyncio.gather(*futures) assert set(gathered) == {"a-0", "a-1", "b-0", "b-1"}
async def test_task_runner_perform_batch(serve_instance): q = ray.remote(Router).remote() await q.setup.remote("") def batcher(*args, **kwargs): return [serve.context.batch_size] * serve.context.batch_size CONSUMER_NAME = "runner" PRODUCER_NAME = "producer" config = BackendConfig( max_batch_size=2, batch_wait_timeout=10, internal_metadata=BackendMetadata(accepts_batches=True)) worker = setup_worker(CONSUMER_NAME, batcher, backend_config=config) await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker) await q.set_backend_config.remote(CONSUMER_NAME, config) await q.set_traffic.remote(PRODUCER_NAME, TrafficPolicy({CONSUMER_NAME: 1.0})) query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python) my_batch_sizes = await asyncio.gather( *[q.enqueue_request.remote(query_param) for _ in range(3)]) assert my_batch_sizes == [2, 2, 1]
def deploy(self, name: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, version: Optional[str] = None, _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig(backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking, is_asgi_app=replica_config.is_asgi_app, path_prefix=replica_config.path_prefix, ) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() goal_ref = self._controller.deploy.remote(name, backend_config, replica_config, version) if _blocking: self._wait_for_goal(goal_ref) else: return goal_ref
def test_backend_config_update(): b = BackendConfig(num_replicas=1, max_batch_size=1) # Test updating a key works. b.num_replicas = 2 assert b.num_replicas == 2 # Check that not specifying a key doesn't update it. assert b.max_batch_size == 1 # Check that input is validated. with pytest.raises(ValidationError): b.num_replicas = "Hello" with pytest.raises(ValidationError): b.num_replicas = -1 # Test batch validation. b = BackendConfig(internal_metadata=BackendMetadata(accepts_batches=False)) b.max_batch_size = 1 with pytest.raises(ValueError): b.max_batch_size = 2 b._validate_complete() b = BackendConfig(internal_metadata=BackendMetadata(accepts_batches=True)) b.max_batch_size = 2
async def test_task_runner_perform_batch(serve_instance, router): def batcher(requests): batch_size = len(requests) return [batch_size] * batch_size config = BackendConfig( max_batch_size=2, batch_wait_timeout=10, internal_metadata=BackendMetadata(accepts_batches=True)) _ = await add_servable_to_router(batcher, router, backend_config=config) query_param = make_request_param() my_batch_sizes = await asyncio.gather( *[router.enqueue_request.remote(query_param) for _ in range(3)]) assert my_batch_sizes == [2, 2, 1]
async def test_task_runner_perform_batch(serve_instance, mock_controller_with_name): def batcher(requests): batch_size = len(requests) return [batch_size] * batch_size config = BackendConfig( max_batch_size=2, batch_wait_timeout=10, internal_metadata=BackendMetadata(accepts_batches=True)) worker, router = await add_servable_to_router(batcher, *mock_controller_with_name, backend_config=config) query_param = make_request_param() my_batch_sizes = await asyncio.gather( *[(await router.assign_request(query_param)) for _ in range(3)]) assert my_batch_sizes == [2, 2, 1]
async def test_graceful_shutdown(serve_instance, router, mock_controller_with_name): class KeepInflight: def __init__(self): self.events = [] def reconfigure(self, config): if config["release"]: [event.set() for event in self.events] async def __call__(self, _): e = asyncio.Event() self.events.append(e) await e.wait() backend_worker = await add_servable_to_router( KeepInflight, router, mock_controller_with_name[0], backend_config=BackendConfig( num_replicas=1, internal_metadata=BackendMetadata(is_blocking=False), user_config={"release": False})) query_param = make_request_param() refs = [(await router.assign_request.remote(query_param)) for _ in range(6)] shutdown_ref = backend_worker.drain_pending_queries.remote() with pytest.raises(ray.exceptions.GetTimeoutError): # Shutdown should block because there are still inflight queries. ray.get(shutdown_ref, timeout=2) config = BackendConfig() config.user_config = {"release": True} await mock_controller_with_name[1].update_backend.remote("backend", config) # All queries should complete successfully ray.get(refs) # The draining operation should be completed. ray.get(shutdown_ref)
async def test_task_runner_perform_async(serve_instance): q = ray.remote(Router).remote() await q.setup.remote("") @ray.remote class Barrier: def __init__(self, release_on): self.release_on = release_on self.current_waiters = 0 self.event = asyncio.Event() async def wait(self): self.current_waiters += 1 if self.current_waiters == self.release_on: self.event.set() else: await self.event.wait() barrier = Barrier.remote(release_on=10) async def wait_and_go(*args, **kwargs): await barrier.wait.remote() return "done!" CONSUMER_NAME = "runner" PRODUCER_NAME = "producer" config = BackendConfig( max_concurrent_queries=10, internal_metadata=BackendMetadata(is_blocking=False)) worker = setup_worker(CONSUMER_NAME, wait_and_go, backend_config=config) await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker) await q.set_backend_config.remote(CONSUMER_NAME, config) q.set_traffic.remote(PRODUCER_NAME, TrafficPolicy({CONSUMER_NAME: 1.0})) query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python) done, not_done = await asyncio.wait( [q.enqueue_request.remote(query_param) for _ in range(10)], timeout=10) assert len(done) == 10 for item in done: await item == "done!"
async def test_task_runner_perform_async(serve_instance, router, mock_controller_with_name): @ray.remote class Barrier: def __init__(self, release_on): self.release_on = release_on self.current_waiters = 0 self.event = asyncio.Event() async def wait(self): self.current_waiters += 1 if self.current_waiters == self.release_on: self.event.set() else: await self.event.wait() barrier = Barrier.remote(release_on=10) async def wait_and_go(*args, **kwargs): await barrier.wait.remote() return "done!" config = BackendConfig( max_concurrent_queries=10, internal_metadata=BackendMetadata(is_blocking=False)) await add_servable_to_router( wait_and_go, router, mock_controller_with_name[0], backend_config=config) query_param = make_request_param() done, not_done = await asyncio.wait( [(await router.assign_request.remote(query_param)) for _ in range(10)], timeout=10) assert len(done) == 10 for item in done: assert await item == "done!"
def create_backend(self, backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, env: Optional[CondaEnv] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. env (serve.CondaEnv, optional): conda environment to run this backend in. Requires the caller to be running in an activated conda environment (not necessarily ``env``), and requires ``env`` to be an existing conda environment on all nodes. If ``env`` is not provided but conda is activated, the backend will run in the conda environment of the caller. """ if backend_tag in self.list_backends().keys(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} if env is None: # If conda is activated, default to conda env of this process. if os.environ.get("CONDA_PREFIX"): if "override_environment_variables" not in ray_actor_options: ray_actor_options["override_environment_variables"] = {} ray_actor_options["override_environment_variables"].update( {"PYTHONHOME": os.environ.get("CONDA_PREFIX")}) else: conda_env_dir = get_conda_env_dir(env.name) ray_actor_options.update( override_environment_variables={"PYTHONHOME": conda_env_dir}) replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() ray.get( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
def create_backend( self, backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of worker processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. """ if backend_tag in self.list_backends(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() ray.get( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
def create_backend( self, backend_tag: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None ) -> None: """Create a backend with the provided tag. Args: backend_tag (str): a unique tag assign to identify this backend. backend_def (callable, class, str): a function or class implementing __call__ and returning a JSON-serializable object or a Starlette Response object. A string import path can also be provided (e.g., "my_module.MyClass"), in which case the underlying function or class will be imported dynamically in the worker replicas. *init_args (optional): the arguments to pass to the class initialization method. Not valid if backend_def is a function. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (dict, serve.BackendConfig, optional): configuration options for this backend. Either a BackendConfig, or a dictionary mapping strings to values for the following supported options: - "num_replicas": number of processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. - "user_config" (experimental): Arguments to pass to the reconfigure method of the backend. The reconfigure method is called if "user_config" is not None. """ if backend_tag in self.list_backends(_internal=True).keys(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig( backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() self._wait_for_goal( self._controller.create_backend.remote(backend_tag, backend_config, replica_config))
async def test_task_runner_custom_method_batch(serve_instance): q = ray.remote(Router).remote() await q.setup.remote("") @serve.accept_batch class Batcher: def a(self, _): return ["a-{}".format(i) for i in range(serve.context.batch_size)] def b(self, _): return ["b-{}".format(i) for i in range(serve.context.batch_size)] def error_different_size(self, _): return [""] * (serve.context.batch_size * 2) def error_non_iterable(self, _): return 42 def return_np_array(self, _): return np.array([1] * serve.context.batch_size).astype(np.int32) CONSUMER_NAME = "runner" PRODUCER_NAME = "producer" backend_config = BackendConfig( max_batch_size=4, batch_wait_timeout=2, internal_metadata=BackendMetadata(accepts_batches=True)) worker = setup_worker(CONSUMER_NAME, Batcher, backend_config=backend_config) await q.set_traffic.remote(PRODUCER_NAME, TrafficPolicy({CONSUMER_NAME: 1.0})) await q.set_backend_config.remote(CONSUMER_NAME, backend_config) def make_request_param(call_method): return RequestMetadata(PRODUCER_NAME, context.TaskContext.Python, call_method=call_method) a_query_param = make_request_param("a") b_query_param = make_request_param("b") futures = [q.enqueue_request.remote(a_query_param) for _ in range(2)] futures += [q.enqueue_request.remote(b_query_param) for _ in range(2)] await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker) gathered = await asyncio.gather(*futures) assert set(gathered) == {"a-0", "a-1", "b-0", "b-1"} with pytest.raises(RayServeException, match="doesn't preserve batch size"): different_size = make_request_param("error_different_size") await q.enqueue_request.remote(different_size) with pytest.raises(RayServeException, match="iterable"): non_iterable = make_request_param("error_non_iterable") await q.enqueue_request.remote(non_iterable) np_array = make_request_param("return_np_array") result_np_value = await q.enqueue_request.remote(np_array) assert isinstance(result_np_value, np.int32)