Ejemplo n.º 1
0
async def test_router_use_max_concurrency(serve_instance):
    signal = SignalActor.remote()

    @ray.remote
    class MockWorker:
        async def handle_request(self, request):
            await signal.wait.remote()
            return "DONE"

        def ready(self):
            pass

    class VisibleRouter(Router):
        def get_queues(self):
            return self.queries_counter, self.backend_queues

    worker = MockWorker.remote()
    q = ray.remote(VisibleRouter).remote()
    BACKEND_NAME = "max-concurrent-test"
    config = BackendConfig({"max_concurrent_queries": 1})
    await q.set_traffic.remote("svc", TrafficPolicy({BACKEND_NAME: 1.0}))
    await q.add_new_worker.remote(BACKEND_NAME, "replica-tag", worker)
    await q.set_backend_config.remote(BACKEND_NAME, config)

    # We send over two queries
    first_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)
    second_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)

    # Neither queries should be available
    with pytest.raises(ray.exceptions.RayTimeoutError):
        ray.get([first_query, second_query], timeout=0.2)

    # Let's retrieve the router internal state
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should be just one inflight request
    assert queries_counter["max-concurrent-test:replica-tag"] == 1
    # The second query is buffered
    assert len(backend_queues["max-concurrent-test"]) == 1

    # Let's unblock the first query
    await signal.send.remote(clear=True)
    assert await first_query == "DONE"

    # The internal state of router should have changed.
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should still be one inflight request
    assert queries_counter["max-concurrent-test:replica-tag"] == 1
    # But there shouldn't be any queries in the queue
    assert len(backend_queues["max-concurrent-test"]) == 0

    # Unblocking the second query
    await signal.send.remote(clear=True)
    assert await second_query == "DONE"

    # Checking the internal state of the router one more time
    queries_counter, backend_queues = await q.get_queues.remote()
    assert queries_counter["max-concurrent-test:replica-tag"] == 0
    assert len(backend_queues["max-concurrent-test"]) == 0
Ejemplo n.º 2
0
def test_worker_replica_failure(serve_instance):
    @ray.remote
    class Counter:
        def __init__(self):
            self.count = 0

        def inc_and_get(self):
            self.count += 1
            return self.count

    class Worker:
        # Assumes that two replicas are started. Will hang forever in the
        # constructor for any workers that are restarted.
        def __init__(self, counter):
            self.should_hang = False
            self.index = ray.get(counter.inc_and_get.remote())
            if self.index > 2:
                while True:
                    pass

        def __call__(self, *args):
            return self.index

    counter = Counter.remote()
    serve.create_backend("replica_failure", Worker, counter)
    serve.update_backend_config(
        "replica_failure", BackendConfig(num_replicas=2))
    serve.create_endpoint(
        "replica_failure", backend="replica_failure", route="/replica_failure")

    # Wait until both replicas have been started.
    responses = set()
    start = time.time()
    while time.time() - start < 30:
        time.sleep(0.1)
        response = request_with_retries("/replica_failure", timeout=1).text
        assert response in ["1", "2"]
        responses.add(response)
        if len(responses) > 1:
            break
    else:
        raise TimeoutError("Timed out waiting for replicas after 30s.")

    # Kill one of the replicas.
    handles = _get_worker_handles("replica_failure")
    assert len(handles) == 2
    ray.kill(handles[0], no_restart=False)

    # Check that the other replica still serves requests.
    for _ in range(10):
        while True:
            try:
                # The timeout needs to be small here because the request to
                # the restarting worker will hang.
                request_with_retries("/replica_failure", timeout=0.1)
                break
            except TimeoutError:
                time.sleep(0.1)
Ejemplo n.º 3
0
async def test_task_runner_custom_method_batch(serve_instance):
    q = RoundRobinPolicyQueueActor.remote()

    @serve.accept_batch
    class Batcher:
        def a(self, _):
            return ["a-{}".format(i) for i in range(serve.context.batch_size)]

        def b(self, _):
            return ["b-{}".format(i) for i in range(serve.context.batch_size)]

        def error_different_size(self, _):
            return [""] * (serve.context.batch_size * 2)

        def error_non_iterable(self, _):
            return 42

        def return_np_array(self, _):
            return np.array([1] * serve.context.batch_size).astype(np.int32)

    CONSUMER_NAME = "runner"
    PRODUCER_NAME = "producer"

    worker = setup_worker(CONSUMER_NAME, Batcher)

    await q.set_traffic.remote(PRODUCER_NAME, {CONSUMER_NAME: 1.0})
    await q.set_backend_config.remote(
        CONSUMER_NAME,
        BackendConfig({
            "max_batch_size": 10
        }, accepts_batches=True))

    def make_request_param(call_method):
        return RequestMetadata(
            PRODUCER_NAME, context.TaskContext.Python, call_method=call_method)

    a_query_param = make_request_param("a")
    b_query_param = make_request_param("b")

    futures = [q.enqueue_request.remote(a_query_param) for _ in range(2)]
    futures += [q.enqueue_request.remote(b_query_param) for _ in range(2)]

    await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker)

    gathered = await asyncio.gather(*futures)
    assert set(gathered) == {"a-0", "a-1", "b-0", "b-1"}

    with pytest.raises(RayServeException, match="doesn't preserve batch size"):
        different_size = make_request_param("error_different_size")
        await q.enqueue_request.remote(different_size)

    with pytest.raises(RayServeException, match="iterable"):
        non_iterable = make_request_param("error_non_iterable")
        await q.enqueue_request.remote(non_iterable)

    np_array = make_request_param("return_np_array")
    result_np_value = await q.enqueue_request.remote(np_array)
    assert isinstance(result_np_value, np.int32)
Ejemplo n.º 4
0
def test_worker_replica_failure(serve_instance):
    client = serve_instance

    class Worker:
        # Assumes that two replicas are started. Will hang forever in the
        # constructor for any workers that are restarted.
        def __init__(self, path):
            self.should_hang = False
            if not os.path.exists(path):
                with open(path, "w") as f:
                    f.write("1")
            else:
                with open(path, "r") as f:
                    num = int(f.read())

                with open(path, "w") as f:
                    if num == 2:
                        self.should_hang = True
                    else:
                        f.write(str(num + 1))

            if self.should_hang:
                while True:
                    pass

        def __call__(self):
            pass

    temp_path = os.path.join(tempfile.gettempdir(),
                             serve.utils.get_random_letters())
    client.create_backend("replica_failure", Worker, temp_path)
    client.update_backend_config("replica_failure",
                                 BackendConfig(num_replicas=2))
    client.create_endpoint("replica_failure",
                           backend="replica_failure",
                           route="/replica_failure")

    # Wait until both replicas have been started.
    responses = set()
    while len(responses) == 1:
        responses.add(request_with_retries("/replica_failure", timeout=1).text)
        time.sleep(0.1)

    # Kill one of the replicas.
    handles = _get_worker_handles(client, "replica_failure")
    assert len(handles) == 2
    ray.kill(handles[0], no_restart=False)

    # Check that the other replica still serves requests.
    for _ in range(10):
        while True:
            try:
                # The timeout needs to be small here because the request to
                # the restarting worker will hang.
                request_with_retries("/replica_failure", timeout=0.1)
                break
            except TimeoutError:
                time.sleep(0.1)
Ejemplo n.º 5
0
Archivo: api.py Proyecto: whitezou/ray
    def deploy(self,
               name: str,
               backend_def: Union[Callable, Type[Callable], str],
               *init_args: Any,
               ray_actor_options: Optional[Dict] = None,
               config: Optional[Union[BackendConfig, Dict[str, Any]]] = None,
               version: Optional[str] = None,
               _blocking: Optional[bool] = True) -> Optional[GoalId]:
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        # If conda is activated and a conda env is not specified in runtime_env
        # in ray_actor_options, default to conda env of this process (client).
        # Without this code, the backend would run in the controller's conda
        # env, which is likely different from that of the client.
        # If using Ray client, skip this convenience feature because the local
        # client env doesn't create the Ray cluster (so the client env is
        # likely not present on the cluster.)
        if not ray.util.client.ray.is_connected():
            if ray_actor_options.get("runtime_env") is None:
                ray_actor_options["runtime_env"] = {}
            if ray_actor_options["runtime_env"].get("conda") is None:
                current_env = os.environ.get("CONDA_DEFAULT_ENV")
                if current_env is not None and current_env != "":
                    ray_actor_options["runtime_env"]["conda"] = current_env

        replica_config = ReplicaConfig(backend_def,
                                       *init_args,
                                       ray_actor_options=ray_actor_options)
        metadata = BackendMetadata(
            accepts_batches=replica_config.accepts_batches,
            is_blocking=replica_config.is_blocking,
            is_asgi_app=replica_config.is_asgi_app,
            path_prefix=replica_config.path_prefix,
        )

        if isinstance(config, dict):
            backend_config = BackendConfig.parse_obj({
                **config, "internal_metadata":
                metadata
            })
        elif isinstance(config, BackendConfig):
            backend_config = config.copy(
                update={"internal_metadata": metadata})
        else:
            raise TypeError("config must be a BackendConfig or a dictionary.")

        backend_config._validate_complete()
        goal_ref = self._controller.deploy.remote(name, backend_config,
                                                  replica_config, version)

        if _blocking:
            self._wait_for_goal(goal_ref)
        else:
            return goal_ref
Ejemplo n.º 6
0
async def test_graceful_shutdown(serve_instance, router,
                                 mock_controller_with_name):
    class KeepInflight:
        def __init__(self):
            self.events = []

        def reconfigure(self, config):
            if config["release"]:
                [event.set() for event in self.events]

        async def __call__(self, _):
            e = asyncio.Event()
            self.events.append(e)
            await e.wait()

    backend_worker = await add_servable_to_router(
        KeepInflight,
        router,
        mock_controller_with_name[0],
        backend_config=BackendConfig(
            num_replicas=1,
            internal_metadata=BackendMetadata(is_blocking=False),
            user_config={"release": False}))

    query_param = make_request_param()

    refs = [(await router.assign_request.remote(query_param))
            for _ in range(6)]

    shutdown_ref = backend_worker.drain_pending_queries.remote()

    with pytest.raises(ray.exceptions.GetTimeoutError):
        # Shutdown should block because there are still inflight queries.
        ray.get(shutdown_ref, timeout=2)

    config = BackendConfig()
    config.user_config = {"release": True}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    # All queries should complete successfully
    ray.get(refs)
    # The draining operation should be completed.
    ray.get(shutdown_ref)
Ejemplo n.º 7
0
    async def do_autoscale(self) -> None:
        for backend, info in self.configuration_store.backends.items():
            if backend not in self.autoscaling_policies:
                continue

            new_num_replicas = self.autoscaling_policies[backend].scale(
                self.backend_stats[backend], info.backend_config.num_replicas)
            if new_num_replicas > 0:
                await self.update_backend_config(
                    backend, BackendConfig(num_replicas=new_num_replicas))
Ejemplo n.º 8
0
def test_scaling_replicas(serve_instance, use_legacy_config):
    client = serve_instance

    class Counter:
        def __init__(self):
            self.count = 0

        def __call__(self, _):
            self.count += 1
            return self.count

    config = {
        "num_replicas": 2
    } if use_legacy_config else BackendConfig(num_replicas=2)
    client.create_backend("counter:v1", Counter, config=config)

    client.create_endpoint("counter", backend="counter:v1", route="/increment")

    # Keep checking the routing table until /increment is populated
    while "/increment" not in requests.get(
            "http://127.0.0.1:8000/-/routes").json():
        time.sleep(0.2)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)

    # If the load is shared among two replicas. The max result cannot be 10.
    assert max(counter_result) < 10

    update_config = {
        "num_replicas": 1
    } if use_legacy_config else BackendConfig(num_replicas=1)
    client.update_backend_config("counter:v1", update_config)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)
    # Give some time for a replica to spin down. But majority of the request
    # should be served by the only remaining replica.
    assert max(counter_result) - min(counter_result) > 6
Ejemplo n.º 9
0
async def test_user_config_update(serve_instance, mock_controller_with_name):
    class Customizable:
        def __init__(self):
            self.reval = ""

        def __call__(self, starlette_request):
            return self.retval

        def reconfigure(self, config):
            self.retval = config["return_val"]

    config = BackendConfig(num_replicas=2,
                           user_config={
                               "return_val": "original",
                               "b": 2
                           })
    worker, router = await add_servable_to_router(Customizable,
                                                  *mock_controller_with_name,
                                                  backend_config=config)

    query_param = make_request_param()

    done = [(await router.assign_request(query_param)) for _ in range(10)]
    for i in done:
        assert await i == "original"

    config = BackendConfig()
    config.user_config = {"return_val": "new_val"}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    async def new_val_returned():
        result = await (await router.assign_request(query_param))
        assert "new_val" == result

    for _ in range(10):
        try:
            await new_val_returned()
        except AssertionError:
            # Wait for config to propogate
            await asyncio.sleep(0.5)
    new_val_returned()
Ejemplo n.º 10
0
        def add_new_replica(self,
                            backend_tag,
                            runner_actor,
                            backend_config=BackendConfig()):
            self.backend_replicas[backend_tag].append(runner_actor)
            self.backend_configs[backend_tag] = backend_config

            self.host.notify_changed(
                "worker_handles",
                self.backend_replicas,
            )
            self.host.notify_changed("backend_configs", self.backend_configs)
Ejemplo n.º 11
0
async def test_user_config_update(serve_instance, router,
                                  mock_controller_with_name):
    class Customizable:
        def __init__(self):
            self.reval = ""

        def __call__(self, starlette_request):
            return self.retval

        def reconfigure(self, config):
            self.retval = config["return_val"]

    config = BackendConfig(
        num_replicas=2, user_config={
            "return_val": "original",
            "b": 2
        })
    await add_servable_to_router(
        Customizable,
        router,
        mock_controller_with_name[0],
        backend_config=config)

    query_param = make_request_param()

    done = [(await router.assign_request.remote(query_param))
            for _ in range(10)]
    for i in done:
        assert await i == "original"

    config = BackendConfig()
    config.user_config = {"return_val": "new_val"}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    done = [(await router.assign_request.remote(query_param))
            for _ in range(10)]

    for i in done:
        assert await i == "new_val"
Ejemplo n.º 12
0
        def add_new_replica(self,
                            backend_tag,
                            runner_actor,
                            backend_config=BackendConfig()):
            self.backend_replicas[backend_tag].append(runner_actor)
            self.backend_configs[backend_tag] = backend_config

            self.host.notify_changed(
                LongPollKey.REPLICA_HANDLES,
                self.backend_replicas,
            )
            self.host.notify_changed(LongPollKey.BACKEND_CONFIGS,
                                     self.backend_configs)
Ejemplo n.º 13
0
def test_updating_config(serve_instance, use_legacy_config):
    client = serve_instance

    class BatchSimple:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, request):
            return [1] * len(request)

    config = {
        "max_batch_size": 2,
        "num_replicas": 3
    } if use_legacy_config else BackendConfig(
        max_batch_size=2, num_replicas=3)
    client.create_backend("bsimple:v1", BatchSimple, config=config)
    client.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple")

    controller = client._controller
    old_replica_tag_list = ray.get(
        controller._list_replicas.remote("bsimple:v1"))

    update_config = {
        "max_batch_size": 5
    } if use_legacy_config else BackendConfig(max_batch_size=5)
    client.update_backend_config("bsimple:v1", update_config)
    new_replica_tag_list = ray.get(
        controller._list_replicas.remote("bsimple:v1"))
    new_all_tag_list = []
    for worker_dict in ray.get(
            controller.get_all_worker_handles.remote()).values():
        new_all_tag_list.extend(list(worker_dict.keys()))

    # the old and new replica tag list should be identical
    # and should be subset of all_tag_list
    assert set(old_replica_tag_list) <= set(new_all_tag_list)
    assert set(old_replica_tag_list) == set(new_replica_tag_list)
Ejemplo n.º 14
0
def test_serve_graceful_shutdown(serve_instance):
    client = serve_instance

    signal = SignalActor.remote()

    class WaitBackend:
        @serve.accept_batch
        async def __call__(self, requests):
            signal_actor = await requests[0].body()
            await signal_actor.wait.remote()
            return ["" for _ in range(len(requests))]

    client.create_backend(
        "wait",
        WaitBackend,
        config=BackendConfig(
            # Make sure we can queue up queries in the replica side.
            max_concurrent_queries=10,
            max_batch_size=1,
            experimental_graceful_shutdown_wait_loop_s=0.5,
            experimental_graceful_shutdown_timeout_s=1000,
        ))
    client.create_endpoint("wait", backend="wait")
    handle = client.get_handle("wait")
    refs = [handle.remote(signal) for _ in range(10)]

    # Wait for all the queries to be enqueued
    with pytest.raises(ray.exceptions.GetTimeoutError):
        ray.get(refs, timeout=1)

    @ray.remote(num_cpus=0)
    def do_blocking_delete():
        client = serve.connect()
        client.delete_endpoint("wait")
        client.delete_backend("wait")

    # Now delete the backend. This should trigger the shutdown sequence.
    delete_ref = do_blocking_delete.remote()

    # The queries should be enqueued but not executed becuase they are blocked
    # by signal actor.
    with pytest.raises(ray.exceptions.GetTimeoutError):
        ray.get(refs, timeout=1)

    signal.send.remote()

    # All the queries should be drained and executed without error.
    ray.get(refs)
    # Blocking delete should complete.
    ray.get(delete_ref)
Ejemplo n.º 15
0
    async def update_backend_config(self, backend_tag: BackendTag,
                                    config_options: BackendConfig) -> GoalId:
        """Set the config for the specified backend."""
        async with self.write_lock:
            existing_backend_info = self.backend_state.get_backend(backend_tag)
            if existing_backend_info is None:
                raise ValueError(f"Backend {backend_tag} is not registered.")

            existing_replica_config = existing_backend_info.replica_config
            new_backend_config = existing_backend_info.backend_config.copy(
                update=config_options.dict(exclude_unset=True))

            return self.backend_state.deploy_backend(
                backend_tag, new_backend_config, existing_replica_config)
Ejemplo n.º 16
0
        async def __init__(self, backend_tag, replica_tag, init_args,
                           init_kwargs, backend_config_proto_bytes: bytes,
                           version: BackendVersion, controller_name: str,
                           detached: bool):
            backend = cloudpickle.loads(serialized_backend_def)
            backend_config = BackendConfig.from_proto_bytes(
                backend_config_proto_bytes)

            if inspect.isfunction(backend):
                is_function = True
            elif inspect.isclass(backend):
                is_function = False
            else:
                assert False, ("backend_def must be function, class, or "
                               "corresponding import path.")

            # Set the controller name so that serve.connect() in the user's
            # backend code will connect to the instance that this backend is
            # running in.
            ray.serve.api._set_internal_replica_context(backend_tag,
                                                        replica_tag,
                                                        controller_name,
                                                        servable_object=None)
            if is_function:
                _callable = backend
            else:
                # This allows backends to define an async __init__ method
                # (required for FastAPI backend definition).
                _callable = backend.__new__(backend)
                await sync_to_async(_callable.__init__)(*init_args,
                                                        **init_kwargs)
            # Setting the context again to update the servable_object.
            ray.serve.api._set_internal_replica_context(
                backend_tag,
                replica_tag,
                controller_name,
                servable_object=_callable)

            assert controller_name, "Must provide a valid controller_name"
            controller_namespace = ray.serve.api._get_controller_namespace(
                detached)
            controller_handle = ray.get_actor(controller_name,
                                              namespace=controller_namespace)
            self.backend = RayServeReplica(_callable, backend_tag, replica_tag,
                                           backend_config,
                                           backend_config.user_config, version,
                                           is_function, controller_handle)

            # asyncio.Event used to signal that the replica is shutting down.
            self.shutdown_event = asyncio.Event()
Ejemplo n.º 17
0
def test_backend_user_config(serve_instance):
    config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2})

    @serve.deployment("counter", config=config)
    class Counter:
        def __init__(self):
            self.count = 10

        def __call__(self, starlette_request):
            return self.count, os.getpid()

        def reconfigure(self, config):
            self.count = config["count"]

    Counter.deploy()
    handle = Counter.get_handle()

    def check(val, num_replicas):
        pids_seen = set()
        for i in range(100):
            result = ray.get(handle.remote())
            if str(result[0]) != val:
                return False
            pids_seen.add(result[1])
        return len(pids_seen) == num_replicas

    wait_for_condition(lambda: check("123", 2))

    config.num_replicas = 3
    Counter = Counter.options(config=config)
    Counter.deploy()
    wait_for_condition(lambda: check("123", 3))

    config.user_config = {"count": 456}
    Counter = Counter.options(config=config)
    Counter.deploy()
    wait_for_condition(lambda: check("456", 3))
Ejemplo n.º 18
0
Archivo: api.py Proyecto: thalvari/ray
def create_backend(backend_tag: str,
                   func_or_class: Union[Callable, Type[Callable]],
                   *actor_init_args: Any,
                   ray_actor_options: Optional[Dict] = None,
                   config: Optional[Dict[str, Any]] = None) -> None:
    """Create a backend with the provided tag.

    The backend will serve requests with func_or_class.

    Args:
        backend_tag (str): a unique tag assign to identify this backend.
        func_or_class (callable, class): a function or a class implementing
            __call__.
        actor_init_args (optional): the arguments to pass to the class.
            initialization method.
        ray_actor_options (optional): options to be passed into the
            @ray.remote decorator for the backend actor.
        config (optional): configuration options for this backend.
            Supported options:
            - "num_replicas": number of worker processes to start up that will
            handle requests to this backend.
            - "max_batch_size": the maximum number of requests that will
            be processed in one batch by this backend.
            - "batch_wait_timeout": time in seconds that backend replicas
            will wait for a full batch of requests before processing a
            partial batch.
            - "max_concurrent_queries": the maximum number of queries that will
            be sent to a replica of this backend without receiving a
            response.
    """
    if backend_tag in list_backends():
        raise ValueError(
            "Cannot create backend. "
            "Backend '{}' is already registered.".format(backend_tag))

    if config is None:
        config = {}
    if not isinstance(config, dict):
        raise TypeError("config must be a dictionary.")

    replica_config = ReplicaConfig(func_or_class,
                                   *actor_init_args,
                                   ray_actor_options=ray_actor_options)
    backend_config = BackendConfig(config, replica_config.accepts_batches,
                                   replica_config.is_blocking)

    ray.get(
        controller.create_backend.remote(backend_tag, backend_config,
                                         replica_config))
Ejemplo n.º 19
0
def test_backend_config_validation():
    # Test unknown key.
    with pytest.raises(ValueError, match="unknown_key"):
        BackendConfig({"unknown_key": -1})

    # Test num_replicas validation.
    BackendConfig({"num_replicas": 1})
    with pytest.raises(TypeError):
        BackendConfig({"num_replicas": "hello"})
    with pytest.raises(ValueError):
        BackendConfig({"num_replicas": -1})

    # Test max_batch_size validation.
    BackendConfig({"max_batch_size": 10}, accepts_batches=True)
    with pytest.raises(ValueError):
        BackendConfig({"max_batch_size": 10}, accepts_batches=False)
    with pytest.raises(TypeError):
        BackendConfig({"max_batch_size": 1.0})
    with pytest.raises(TypeError):
        BackendConfig({"max_batch_size": "hello"})
    with pytest.raises(ValueError):
        BackendConfig({"max_batch_size": 0})
    with pytest.raises(ValueError):
        BackendConfig({"max_batch_size": -1})
Ejemplo n.º 20
0
def test_imported_backend(serve_instance):
    client = serve_instance

    backend_class = ImportedBackend("ray.serve.utils.MockImportedBackend")
    config = BackendConfig(user_config="config", max_batch_size=2)
    client.create_backend(
        "imported", backend_class, "input_arg", config=config)
    client.create_endpoint("imported", backend="imported")

    # Basic sanity check.
    handle = client.get_handle("imported")
    assert ray.get(handle.remote()) == {"arg": "input_arg", "config": "config"}

    # Check that updating backend config works.
    client.update_backend_config(
        "imported", BackendConfig(user_config="new_config"))
    assert ray.get(handle.remote()) == {
        "arg": "input_arg",
        "config": "new_config"
    }

    # Check that other call methods work.
    handle = handle.options(method_name="other_method")
    assert ray.get(handle.remote("hello")) == "hello"
Ejemplo n.º 21
0
def test_list_backends(serve_instance):
    def f():
        pass

    config1 = BackendConfig(max_concurrent_queries=10)
    serve.create_backend("backend", f, config=config1)
    backends = serve.list_backends()
    assert len(backends) == 1
    assert "backend" in backends
    assert backends["backend"].max_concurrent_queries == 10

    config2 = BackendConfig(num_replicas=10)
    serve.create_backend("backend2", f, config=config2)
    backends = serve.list_backends()
    assert len(backends) == 2
    assert backends["backend2"].num_replicas == 10

    serve.delete_backend("backend")
    backends = serve.list_backends()
    assert len(backends) == 1
    assert "backend2" in backends

    serve.delete_backend("backend2")
    assert len(serve.list_backends()) == 0
Ejemplo n.º 22
0
async def test_task_runner_perform_batch(serve_instance, router):
    def batcher(requests):
        batch_size = len(requests)
        return [batch_size] * batch_size

    config = BackendConfig(
        max_batch_size=2,
        batch_wait_timeout=10,
        internal_metadata=BackendMetadata(accepts_batches=True))

    _ = await add_servable_to_router(batcher, router, backend_config=config)

    query_param = make_request_param()
    my_batch_sizes = await asyncio.gather(
        *[router.enqueue_request.remote(query_param) for _ in range(3)])
    assert my_batch_sizes == [2, 2, 1]
Ejemplo n.º 23
0
    def deploy(
        self,
        name: str,
        backend_config_proto_bytes: bytes,
        replica_config: ReplicaConfig,
        version: Optional[str],
        prev_version: Optional[str],
        route_prefix: Optional[str],
        deployer_job_id: "Optional[ray._raylet.JobID]" = None
    ) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        backend_config = BackendConfig.from_proto_bytes(
            backend_config_proto_bytes)

        if prev_version is not None:
            existing_backend_info = self.backend_state_manager.get_backend(
                name)
            if (existing_backend_info is None
                    or not existing_backend_info.version):
                raise ValueError(
                    f"prev_version '{prev_version}' is specified but "
                    "there is no existing deployment.")
            if existing_backend_info.version != prev_version:
                raise ValueError(f"prev_version '{prev_version}' "
                                 "does not match with the existing "
                                 f"version '{existing_backend_info.version}'.")
        backend_info = BackendInfo(actor_def=ray.remote(
            create_replica_wrapper(name,
                                   replica_config.serialized_backend_def)),
                                   version=version,
                                   backend_config=backend_config,
                                   replica_config=replica_config,
                                   deployer_job_id=deployer_job_id,
                                   start_time_ms=int(time.time() * 1000))
        # TODO(architkulkarni): When a deployment is redeployed, even if
        # the only change was num_replicas, the start_time_ms is refreshed.
        # This is probably not the desired behavior for an autoscaling
        # deployment, which redeploys very often to change num_replicas.

        goal_id, updating = self.backend_state_manager.deploy_backend(
            name, backend_info)
        endpoint_info = EndpointInfo(route=route_prefix)
        self.endpoint_state.update_endpoint(name, endpoint_info)
        return goal_id, updating
Ejemplo n.º 24
0
    async def deploy(
        self,
        name: str,
        backend_config_proto_bytes: bytes,
        replica_config: ReplicaConfig,
        python_methods: List[str],
        version: Optional[str],
        prev_version: Optional[str],
        route_prefix: Optional[str],
        deployer_job_id: "Optional[ray._raylet.JobID]" = None
    ) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        backend_config = BackendConfig.from_proto_bytes(
            backend_config_proto_bytes)

        async with self.write_lock:
            if prev_version is not None:
                existing_backend_info = self.backend_state_manager.get_backend(
                    name)
                if (existing_backend_info is None
                        or not existing_backend_info.version):
                    raise ValueError(
                        f"prev_version '{prev_version}' is specified but "
                        "there is no existing deployment.")
                if existing_backend_info.version != prev_version:
                    raise ValueError(
                        f"prev_version '{prev_version}' "
                        "does not match with the existing "
                        f"version '{existing_backend_info.version}'.")
            backend_info = BackendInfo(actor_def=ray.remote(
                create_backend_replica(name,
                                       replica_config.serialized_backend_def)),
                                       version=version,
                                       backend_config=backend_config,
                                       replica_config=replica_config,
                                       deployer_job_id=deployer_job_id,
                                       start_time_ms=int(time.time() * 1000))

            goal_id, updating = self.backend_state_manager.deploy_backend(
                name, backend_info)
            endpoint_info = EndpointInfo(route=route_prefix,
                                         python_methods=python_methods)
            self.endpoint_state.update_endpoint(name, endpoint_info)
            return goal_id, updating
Ejemplo n.º 25
0
def test_serve_forceful_shutdown(serve_instance):
    def sleeper(_):
        while True:
            time.sleep(1000)

    serve.create_backend(
        "sleeper",
        sleeper,
        config=BackendConfig(experimental_graceful_shutdown_timeout_s=1))
    serve.create_endpoint("sleeper", backend="sleeper")
    handle = serve.get_handle("sleeper")
    ref = handle.remote()
    serve.delete_endpoint("sleeper")
    serve.delete_backend("sleeper")

    with pytest.raises(ray.exceptions.RayActorError):
        ray.get(ref)
Ejemplo n.º 26
0
def test_batching_exception(serve_instance):
    class NoListReturned:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, requests):
            return len(requests)

    # Set the max batch size.
    config = BackendConfig(max_batch_size=5)
    serve.create_backend("exception:v1", NoListReturned, config=config)
    serve.create_endpoint("exception-test", backend="exception:v1")

    handle = serve.get_handle("exception-test")
    with pytest.raises(ray.exceptions.RayTaskError):
        assert ray.get(handle.remote(temp=1))
Ejemplo n.º 27
0
    async def update_backend_config(self, backend_tag: BackendTag,
                                    config_options: BackendConfig) -> GoalId:
        """Set the config for the specified backend."""
        async with self.write_lock:
            existing_info = self.backend_state.get_backend(backend_tag)
            if existing_info is None:
                raise ValueError(f"Backend {backend_tag} is not registered.")

            backend_info = BackendInfo(
                actor_def=existing_info.actor_def,
                version=existing_info.version,
                backend_config=existing_info.backend_config.copy(
                    update=config_options.dict(exclude_unset=True)),
                replica_config=existing_info.replica_config)
            goal_id, _ = self.backend_state.deploy_backend(
                backend_tag, backend_info)
            return goal_id
Ejemplo n.º 28
0
        def add_new_replica(self,
                            backend_tag,
                            runner_actor,
                            backend_config=BackendConfig()):
            self.backend_replicas[backend_tag].append(runner_actor)
            self.backend_configs[backend_tag] = backend_config

            ray.get(runner_actor.reconfigure.remote(
                backend_config.user_config))

            self.host.notify_changed(
                (LongPollNamespace.REPLICA_HANDLES, backend_tag),
                self.backend_replicas[backend_tag],
            )
            self.host.notify_changed(
                (LongPollNamespace.BACKEND_CONFIGS, backend_tag),
                self.backend_configs[backend_tag],
            )
Ejemplo n.º 29
0
async def test_task_runner_perform_batch(serve_instance,
                                         mock_controller_with_name):
    def batcher(requests):
        batch_size = len(requests)
        return [batch_size] * batch_size

    config = BackendConfig(
        max_batch_size=2,
        batch_wait_timeout=10,
        internal_metadata=BackendMetadata(accepts_batches=True))

    worker, router = await add_servable_to_router(batcher,
                                                  *mock_controller_with_name,
                                                  backend_config=config)

    query_param = make_request_param()
    my_batch_sizes = await asyncio.gather(
        *[(await router.assign_request(query_param)) for _ in range(3)])
    assert my_batch_sizes == [2, 2, 1]
Ejemplo n.º 30
0
def test_batching_exception(serve_instance):
    class NoListReturned:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, flask_request, temp=None):
            batch_size = serve.context.batch_size
            return batch_size

    # set the max batch size
    serve.create_backend(
        "exception:v1", NoListReturned, config=BackendConfig(max_batch_size=5))
    serve.create_endpoint(
        "exception-test", backend="exception:v1", route="/noListReturned")

    handle = serve.get_handle("exception-test")
    with pytest.raises(ray.exceptions.RayTaskError):
        assert ray.get(handle.remote(temp=1))