Beispiel #1
0
def test_backend_config_validation():
    # Test unknown key.
    with pytest.raises(ValidationError):
        BackendConfig(unknown_key=-1)

    # Test num_replicas validation.
    BackendConfig(num_replicas=1)
    with pytest.raises(ValidationError, match="type_error"):
        BackendConfig(num_replicas="hello")
    with pytest.raises(ValidationError, match="value_error"):
        BackendConfig(num_replicas=-1)

    # Test max_batch_size validation.
    BackendConfig(max_batch_size=10,
                  internal_metadata=BackendMetadata(
                      accepts_batches=True))._validate_complete()
    with pytest.raises(ValueError):
        BackendConfig(max_batch_size=10,
                      internal_metadata=BackendMetadata(
                          accepts_batches=False))._validate_complete()
    with pytest.raises(ValidationError, match="type_error"):
        BackendConfig(max_batch_size="hello")
    with pytest.raises(ValidationError, match="value_error"):
        BackendConfig(max_batch_size=0)
    with pytest.raises(ValidationError, match="value_error"):
        BackendConfig(max_batch_size=-1)

    # Test dynamic default for max_concurrent_queries.
    assert BackendConfig().max_concurrent_queries == 8
    assert BackendConfig(max_batch_size=7).max_concurrent_queries == 14
    assert BackendConfig(max_batch_size=10,
                         internal_metadata=BackendMetadata(
                             is_blocking=False)).max_concurrent_queries == 100
    assert BackendConfig(max_batch_size=7,
                         batch_wait_timeout=1.0).max_concurrent_queries == 14
Beispiel #2
0
async def test_servable_batch_error(serve_instance, router,
                                    mock_controller_with_name):
    @serve.accept_batch
    class ErrorBatcher:
        def error_different_size(self, requests):
            return [""] * (len(requests) + 10)

        def error_non_iterable(self, _):
            return 42

        def return_np_array(self, requests):
            return np.array([1] * len(requests)).astype(np.int32)

    backend_config = BackendConfig(
        max_batch_size=4,
        internal_metadata=BackendMetadata(accepts_batches=True))
    await add_servable_to_router(ErrorBatcher,
                                 router,
                                 mock_controller_with_name[0],
                                 backend_config=backend_config)

    with pytest.raises(RayServeException, match="doesn't preserve batch size"):
        different_size = make_request_param("error_different_size")
        await (await router.assign_request.remote(different_size))

    with pytest.raises(RayServeException, match="iterable"):
        non_iterable = make_request_param("error_non_iterable")
        await (await router.assign_request.remote(non_iterable))

    np_array = make_request_param("return_np_array")
    result_np_value = await (await router.assign_request.remote(np_array))
    assert isinstance(result_np_value, np.int32)
Beispiel #3
0
async def test_task_runner_custom_method_batch(serve_instance, router,
                                               mock_controller_with_name):
    @serve.accept_batch
    class Batcher:
        def a(self, requests):
            return ["a-{}".format(i) for i in range(len(requests))]

        def b(self, requests):
            return ["b-{}".format(i) for i in range(len(requests))]

    backend_config = BackendConfig(
        max_batch_size=4,
        batch_wait_timeout=10,
        internal_metadata=BackendMetadata(accepts_batches=True))
    await add_servable_to_router(Batcher,
                                 router,
                                 mock_controller_with_name[0],
                                 backend_config=backend_config)

    a_query_param = make_request_param("a")
    b_query_param = make_request_param("b")

    futures = [
        await router.assign_request.remote(a_query_param) for _ in range(2)
    ]
    futures += [
        await router.assign_request.remote(b_query_param) for _ in range(2)
    ]

    gathered = await asyncio.gather(*futures)
    assert set(gathered) == {"a-0", "a-1", "b-0", "b-1"}
Beispiel #4
0
async def test_task_runner_perform_batch(serve_instance):
    q = ray.remote(Router).remote()
    await q.setup.remote("")

    def batcher(*args, **kwargs):
        return [serve.context.batch_size] * serve.context.batch_size

    CONSUMER_NAME = "runner"
    PRODUCER_NAME = "producer"

    config = BackendConfig(
        max_batch_size=2,
        batch_wait_timeout=10,
        internal_metadata=BackendMetadata(accepts_batches=True))

    worker = setup_worker(CONSUMER_NAME, batcher, backend_config=config)
    await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker)
    await q.set_backend_config.remote(CONSUMER_NAME, config)
    await q.set_traffic.remote(PRODUCER_NAME,
                               TrafficPolicy({CONSUMER_NAME: 1.0}))

    query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python)

    my_batch_sizes = await asyncio.gather(
        *[q.enqueue_request.remote(query_param) for _ in range(3)])
    assert my_batch_sizes == [2, 2, 1]
Beispiel #5
0
    def deploy(self,
               name: str,
               backend_def: Union[Callable, Type[Callable], str],
               *init_args: Any,
               ray_actor_options: Optional[Dict] = None,
               config: Optional[Union[BackendConfig, Dict[str, Any]]] = None,
               version: Optional[str] = None,
               _blocking: Optional[bool] = True) -> Optional[GoalId]:
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        # If conda is activated and a conda env is not specified in runtime_env
        # in ray_actor_options, default to conda env of this process (client).
        # Without this code, the backend would run in the controller's conda
        # env, which is likely different from that of the client.
        # If using Ray client, skip this convenience feature because the local
        # client env doesn't create the Ray cluster (so the client env is
        # likely not present on the cluster.)
        if not ray.util.client.ray.is_connected():
            if ray_actor_options.get("runtime_env") is None:
                ray_actor_options["runtime_env"] = {}
            if ray_actor_options["runtime_env"].get("conda") is None:
                current_env = os.environ.get("CONDA_DEFAULT_ENV")
                if current_env is not None and current_env != "":
                    ray_actor_options["runtime_env"]["conda"] = current_env

        replica_config = ReplicaConfig(backend_def,
                                       *init_args,
                                       ray_actor_options=ray_actor_options)
        metadata = BackendMetadata(
            accepts_batches=replica_config.accepts_batches,
            is_blocking=replica_config.is_blocking,
            is_asgi_app=replica_config.is_asgi_app,
            path_prefix=replica_config.path_prefix,
        )

        if isinstance(config, dict):
            backend_config = BackendConfig.parse_obj({
                **config, "internal_metadata":
                metadata
            })
        elif isinstance(config, BackendConfig):
            backend_config = config.copy(
                update={"internal_metadata": metadata})
        else:
            raise TypeError("config must be a BackendConfig or a dictionary.")

        backend_config._validate_complete()
        goal_ref = self._controller.deploy.remote(name, backend_config,
                                                  replica_config, version)

        if _blocking:
            self._wait_for_goal(goal_ref)
        else:
            return goal_ref
Beispiel #6
0
def test_backend_config_update():
    b = BackendConfig(num_replicas=1, max_batch_size=1)

    # Test updating a key works.
    b.num_replicas = 2
    assert b.num_replicas == 2
    # Check that not specifying a key doesn't update it.
    assert b.max_batch_size == 1

    # Check that input is validated.
    with pytest.raises(ValidationError):
        b.num_replicas = "Hello"
    with pytest.raises(ValidationError):
        b.num_replicas = -1

    # Test batch validation.
    b = BackendConfig(internal_metadata=BackendMetadata(accepts_batches=False))
    b.max_batch_size = 1
    with pytest.raises(ValueError):
        b.max_batch_size = 2
        b._validate_complete()

    b = BackendConfig(internal_metadata=BackendMetadata(accepts_batches=True))
    b.max_batch_size = 2
Beispiel #7
0
async def test_task_runner_perform_batch(serve_instance, router):
    def batcher(requests):
        batch_size = len(requests)
        return [batch_size] * batch_size

    config = BackendConfig(
        max_batch_size=2,
        batch_wait_timeout=10,
        internal_metadata=BackendMetadata(accepts_batches=True))

    _ = await add_servable_to_router(batcher, router, backend_config=config)

    query_param = make_request_param()
    my_batch_sizes = await asyncio.gather(
        *[router.enqueue_request.remote(query_param) for _ in range(3)])
    assert my_batch_sizes == [2, 2, 1]
Beispiel #8
0
async def test_task_runner_perform_batch(serve_instance,
                                         mock_controller_with_name):
    def batcher(requests):
        batch_size = len(requests)
        return [batch_size] * batch_size

    config = BackendConfig(
        max_batch_size=2,
        batch_wait_timeout=10,
        internal_metadata=BackendMetadata(accepts_batches=True))

    worker, router = await add_servable_to_router(batcher,
                                                  *mock_controller_with_name,
                                                  backend_config=config)

    query_param = make_request_param()
    my_batch_sizes = await asyncio.gather(
        *[(await router.assign_request(query_param)) for _ in range(3)])
    assert my_batch_sizes == [2, 2, 1]
Beispiel #9
0
async def test_graceful_shutdown(serve_instance, router,
                                 mock_controller_with_name):
    class KeepInflight:
        def __init__(self):
            self.events = []

        def reconfigure(self, config):
            if config["release"]:
                [event.set() for event in self.events]

        async def __call__(self, _):
            e = asyncio.Event()
            self.events.append(e)
            await e.wait()

    backend_worker = await add_servable_to_router(
        KeepInflight,
        router,
        mock_controller_with_name[0],
        backend_config=BackendConfig(
            num_replicas=1,
            internal_metadata=BackendMetadata(is_blocking=False),
            user_config={"release": False}))

    query_param = make_request_param()

    refs = [(await router.assign_request.remote(query_param))
            for _ in range(6)]

    shutdown_ref = backend_worker.drain_pending_queries.remote()

    with pytest.raises(ray.exceptions.GetTimeoutError):
        # Shutdown should block because there are still inflight queries.
        ray.get(shutdown_ref, timeout=2)

    config = BackendConfig()
    config.user_config = {"release": True}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    # All queries should complete successfully
    ray.get(refs)
    # The draining operation should be completed.
    ray.get(shutdown_ref)
Beispiel #10
0
async def test_task_runner_perform_async(serve_instance):
    q = ray.remote(Router).remote()
    await q.setup.remote("")

    @ray.remote
    class Barrier:
        def __init__(self, release_on):
            self.release_on = release_on
            self.current_waiters = 0
            self.event = asyncio.Event()

        async def wait(self):
            self.current_waiters += 1
            if self.current_waiters == self.release_on:
                self.event.set()
            else:
                await self.event.wait()

    barrier = Barrier.remote(release_on=10)

    async def wait_and_go(*args, **kwargs):
        await barrier.wait.remote()
        return "done!"

    CONSUMER_NAME = "runner"
    PRODUCER_NAME = "producer"

    config = BackendConfig(
        max_concurrent_queries=10,
        internal_metadata=BackendMetadata(is_blocking=False))

    worker = setup_worker(CONSUMER_NAME, wait_and_go, backend_config=config)
    await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker)
    await q.set_backend_config.remote(CONSUMER_NAME, config)
    q.set_traffic.remote(PRODUCER_NAME, TrafficPolicy({CONSUMER_NAME: 1.0}))

    query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python)

    done, not_done = await asyncio.wait(
        [q.enqueue_request.remote(query_param) for _ in range(10)], timeout=10)
    assert len(done) == 10
    for item in done:
        await item == "done!"
Beispiel #11
0
async def test_task_runner_perform_async(serve_instance, router,
                                         mock_controller_with_name):
    @ray.remote
    class Barrier:
        def __init__(self, release_on):
            self.release_on = release_on
            self.current_waiters = 0
            self.event = asyncio.Event()

        async def wait(self):
            self.current_waiters += 1
            if self.current_waiters == self.release_on:
                self.event.set()
            else:
                await self.event.wait()

    barrier = Barrier.remote(release_on=10)

    async def wait_and_go(*args, **kwargs):
        await barrier.wait.remote()
        return "done!"

    config = BackendConfig(
        max_concurrent_queries=10,
        internal_metadata=BackendMetadata(is_blocking=False))

    await add_servable_to_router(
        wait_and_go,
        router,
        mock_controller_with_name[0],
        backend_config=config)

    query_param = make_request_param()

    done, not_done = await asyncio.wait(
        [(await router.assign_request.remote(query_param)) for _ in range(10)],
        timeout=10)
    assert len(done) == 10
    for item in done:
        assert await item == "done!"
Beispiel #12
0
    def create_backend(self,
                       backend_tag: str,
                       func_or_class: Union[Callable, Type[Callable]],
                       *actor_init_args: Any,
                       ray_actor_options: Optional[Dict] = None,
                       config: Optional[Union[BackendConfig,
                                              Dict[str, Any]]] = None,
                       env: Optional[CondaEnv] = None) -> None:
        """Create a backend with the provided tag.

        The backend will serve requests with func_or_class.

        Args:
            backend_tag (str): a unique tag assign to identify this backend.
            func_or_class (callable, class): a function or a class implementing
                __call__.
            actor_init_args (optional): the arguments to pass to the class.
                initialization method.
            ray_actor_options (optional): options to be passed into the
                @ray.remote decorator for the backend actor.
            config (dict, serve.BackendConfig, optional): configuration options
                for this backend. Either a BackendConfig, or a dictionary
                mapping strings to values for the following supported options:
                - "num_replicas": number of processes to start up that
                will handle requests to this backend.
                - "max_batch_size": the maximum number of requests that will
                be processed in one batch by this backend.
                - "batch_wait_timeout": time in seconds that backend replicas
                will wait for a full batch of requests before processing a
                partial batch.
                - "max_concurrent_queries": the maximum number of queries that
                will be sent to a replica of this backend without receiving a
                response.
                - "user_config" (experimental): Arguments to pass to the
                reconfigure method of the backend. The reconfigure method is
                called if "user_config" is not None.
            env (serve.CondaEnv, optional): conda environment to run this
                backend in.  Requires the caller to be running in an activated
                conda environment (not necessarily ``env``), and requires
                ``env`` to be an existing conda environment on all nodes.  If
                ``env`` is not provided but conda is activated, the backend
                will run in the conda environment of the caller.
        """
        if backend_tag in self.list_backends().keys():
            raise ValueError(
                "Cannot create backend. "
                "Backend '{}' is already registered.".format(backend_tag))

        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}
        if env is None:
            # If conda is activated, default to conda env of this process.
            if os.environ.get("CONDA_PREFIX"):
                if "override_environment_variables" not in ray_actor_options:
                    ray_actor_options["override_environment_variables"] = {}
                ray_actor_options["override_environment_variables"].update(
                    {"PYTHONHOME": os.environ.get("CONDA_PREFIX")})
        else:
            conda_env_dir = get_conda_env_dir(env.name)
            ray_actor_options.update(
                override_environment_variables={"PYTHONHOME": conda_env_dir})
        replica_config = ReplicaConfig(func_or_class,
                                       *actor_init_args,
                                       ray_actor_options=ray_actor_options)
        metadata = BackendMetadata(
            accepts_batches=replica_config.accepts_batches,
            is_blocking=replica_config.is_blocking)

        if isinstance(config, dict):
            backend_config = BackendConfig.parse_obj({
                **config, "internal_metadata":
                metadata
            })
        elif isinstance(config, BackendConfig):
            backend_config = config.copy(
                update={"internal_metadata": metadata})
        else:
            raise TypeError("config must be a BackendConfig or a dictionary.")

        backend_config._validate_complete()
        ray.get(
            self._controller.create_backend.remote(backend_tag, backend_config,
                                                   replica_config))
Beispiel #13
0
Datei: api.py Projekt: yynst2/ray
    def create_backend(
            self,
            backend_tag: str,
            func_or_class: Union[Callable, Type[Callable]],
            *actor_init_args: Any,
            ray_actor_options: Optional[Dict] = None,
            config: Optional[Union[BackendConfig, Dict[str,
                                                       Any]]] = None) -> None:
        """Create a backend with the provided tag.

        The backend will serve requests with func_or_class.

        Args:
            backend_tag (str): a unique tag assign to identify this backend.
            func_or_class (callable, class): a function or a class implementing
                __call__.
            actor_init_args (optional): the arguments to pass to the class.
                initialization method.
            ray_actor_options (optional): options to be passed into the
                @ray.remote decorator for the backend actor.
            config (dict, serve.BackendConfig, optional): configuration options
                for this backend. Either a BackendConfig, or a dictionary
                mapping strings to values for the following supported options:
                - "num_replicas": number of worker processes to start up that
                will handle requests to this backend.
                - "max_batch_size": the maximum number of requests that will
                be processed in one batch by this backend.
                - "batch_wait_timeout": time in seconds that backend replicas
                will wait for a full batch of requests before processing a
                partial batch.
                - "max_concurrent_queries": the maximum number of queries that
                will be sent to a replica of this backend without receiving a
                response.
        """
        if backend_tag in self.list_backends():
            raise ValueError(
                "Cannot create backend. "
                "Backend '{}' is already registered.".format(backend_tag))

        if config is None:
            config = {}
        replica_config = ReplicaConfig(func_or_class,
                                       *actor_init_args,
                                       ray_actor_options=ray_actor_options)
        metadata = BackendMetadata(
            accepts_batches=replica_config.accepts_batches,
            is_blocking=replica_config.is_blocking)
        if isinstance(config, dict):
            backend_config = BackendConfig.parse_obj({
                **config, "internal_metadata":
                metadata
            })
        elif isinstance(config, BackendConfig):
            backend_config = config.copy(
                update={"internal_metadata": metadata})
        else:
            raise TypeError("config must be a BackendConfig or a dictionary.")
        backend_config._validate_complete()
        ray.get(
            self._controller.create_backend.remote(backend_tag, backend_config,
                                                   replica_config))
Beispiel #14
0
    def create_backend(
            self,
            backend_tag: str,
            backend_def: Union[Callable, Type[Callable], str],
            *init_args: Any,
            ray_actor_options: Optional[Dict] = None,
            config: Optional[Union[BackendConfig, Dict[str, Any]]] = None
    ) -> None:
        """Create a backend with the provided tag.

        Args:
            backend_tag (str): a unique tag assign to identify this backend.
            backend_def (callable, class, str): a function or class
                implementing __call__ and returning a JSON-serializable object
                or a Starlette Response object. A string import path can also
                be provided (e.g., "my_module.MyClass"), in which case the
                underlying function or class will be imported dynamically in
                the worker replicas.
            *init_args (optional): the arguments to pass to the class
                initialization method. Not valid if backend_def is a function.
            ray_actor_options (optional): options to be passed into the
                @ray.remote decorator for the backend actor.
            config (dict, serve.BackendConfig, optional): configuration options
                for this backend. Either a BackendConfig, or a dictionary
                mapping strings to values for the following supported options:
                - "num_replicas": number of processes to start up that
                will handle requests to this backend.
                - "max_batch_size": the maximum number of requests that will
                be processed in one batch by this backend.
                - "batch_wait_timeout": time in seconds that backend replicas
                will wait for a full batch of requests before processing a
                partial batch.
                - "max_concurrent_queries": the maximum number of queries that
                will be sent to a replica of this backend without receiving a
                response.
                - "user_config" (experimental): Arguments to pass to the
                reconfigure method of the backend. The reconfigure method is
                called if "user_config" is not None.
        """
        if backend_tag in self.list_backends(_internal=True).keys():
            raise ValueError(
                "Cannot create backend. "
                "Backend '{}' is already registered.".format(backend_tag))

        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        # If conda is activated and a conda env is not specified in runtime_env
        # in ray_actor_options, default to conda env of this process (client).
        # Without this code, the backend would run in the controller's conda
        # env, which is likely different from that of the client.
        # If using Ray client, skip this convenience feature because the local
        # client env doesn't create the Ray cluster (so the client env is
        # likely not present on the cluster.)
        if not ray.util.client.ray.is_connected():
            if ray_actor_options.get("runtime_env") is None:
                ray_actor_options["runtime_env"] = {}
            if ray_actor_options["runtime_env"].get("conda") is None:
                current_env = os.environ.get("CONDA_DEFAULT_ENV")
                if current_env is not None and current_env != "":
                    ray_actor_options["runtime_env"]["conda"] = current_env

        replica_config = ReplicaConfig(
            backend_def, *init_args, ray_actor_options=ray_actor_options)
        metadata = BackendMetadata(
            accepts_batches=replica_config.accepts_batches,
            is_blocking=replica_config.is_blocking)

        if isinstance(config, dict):
            backend_config = BackendConfig.parse_obj({
                **config, "internal_metadata": metadata
            })
        elif isinstance(config, BackendConfig):
            backend_config = config.copy(
                update={"internal_metadata": metadata})
        else:
            raise TypeError("config must be a BackendConfig or a dictionary.")

        backend_config._validate_complete()
        self._wait_for_goal(
            self._controller.create_backend.remote(backend_tag, backend_config,
                                                   replica_config))
Beispiel #15
0
async def test_task_runner_custom_method_batch(serve_instance):
    q = ray.remote(Router).remote()
    await q.setup.remote("")

    @serve.accept_batch
    class Batcher:
        def a(self, _):
            return ["a-{}".format(i) for i in range(serve.context.batch_size)]

        def b(self, _):
            return ["b-{}".format(i) for i in range(serve.context.batch_size)]

        def error_different_size(self, _):
            return [""] * (serve.context.batch_size * 2)

        def error_non_iterable(self, _):
            return 42

        def return_np_array(self, _):
            return np.array([1] * serve.context.batch_size).astype(np.int32)

    CONSUMER_NAME = "runner"
    PRODUCER_NAME = "producer"

    backend_config = BackendConfig(
        max_batch_size=4,
        batch_wait_timeout=2,
        internal_metadata=BackendMetadata(accepts_batches=True))
    worker = setup_worker(CONSUMER_NAME,
                          Batcher,
                          backend_config=backend_config)

    await q.set_traffic.remote(PRODUCER_NAME,
                               TrafficPolicy({CONSUMER_NAME: 1.0}))
    await q.set_backend_config.remote(CONSUMER_NAME, backend_config)

    def make_request_param(call_method):
        return RequestMetadata(PRODUCER_NAME,
                               context.TaskContext.Python,
                               call_method=call_method)

    a_query_param = make_request_param("a")
    b_query_param = make_request_param("b")

    futures = [q.enqueue_request.remote(a_query_param) for _ in range(2)]
    futures += [q.enqueue_request.remote(b_query_param) for _ in range(2)]

    await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker)

    gathered = await asyncio.gather(*futures)
    assert set(gathered) == {"a-0", "a-1", "b-0", "b-1"}

    with pytest.raises(RayServeException, match="doesn't preserve batch size"):
        different_size = make_request_param("error_different_size")
        await q.enqueue_request.remote(different_size)

    with pytest.raises(RayServeException, match="iterable"):
        non_iterable = make_request_param("error_non_iterable")
        await q.enqueue_request.remote(non_iterable)

    np_array = make_request_param("return_np_array")
    result_np_value = await q.enqueue_request.remote(np_array)
    assert isinstance(result_np_value, np.int32)