Exemple #1
0
async def test_changing_backend(ray_instance, mock_controller,
                                task_runner_mock_actor):
    q = ray.remote(EndpointRouter).remote(mock_controller, "svc")

    await mock_controller.set_traffic.remote(
        "svc", TrafficPolicy({
            "backend-alter": 1
        }))
    await mock_controller.add_new_replica.remote("backend-alter",
                                                 task_runner_mock_actor)

    await (await q.assign_request.remote(
        RequestMetadata(get_random_letters(10), "svc"), 1))
    got_work = await task_runner_mock_actor.get_recent_call.remote()
    assert got_work.args[0] == 1

    await mock_controller.set_traffic.remote(
        "svc", TrafficPolicy({
            "backend-alter-2": 1
        }))
    await mock_controller.add_new_replica.remote("backend-alter-2",
                                                 task_runner_mock_actor)
    await (await q.assign_request.remote(
        RequestMetadata(get_random_letters(10), "svc"), 2))
    got_work = await task_runner_mock_actor.get_recent_call.remote()
    assert got_work.args[0] == 2
Exemple #2
0
def _start_replica(backend_tag):
    assert (backend_tag in global_state.backend_table.list_backends()
            ), "Backend {} is not registered.".format(backend_tag)

    replica_tag = "{}#{}".format(backend_tag, get_random_letters(length=6))

    # get the info which starts the replicas
    creator = global_state.backend_table.get_backend_creator(backend_tag)
    backend_config_dict = global_state.backend_table.get_info(backend_tag)
    backend_config = BackendConfig(**backend_config_dict)
    init_args = global_state.backend_table.get_init_args(backend_tag)

    # get actor creation kwargs
    actor_kwargs = backend_config.get_actor_creation_args(init_args)

    # Create the runner in the nursery
    [runner_handle] = ray.get(
        global_state.actor_nursery_handle.start_actor_with_creator.remote(
            creator, actor_kwargs, replica_tag))

    # Setup the worker
    ray.get(
        runner_handle._ray_serve_setup.remote(
            backend_tag, global_state.init_or_get_router(), runner_handle))
    runner_handle._ray_serve_fetch.remote()

    # Register the worker in config tables as well as metric monitor
    global_state.backend_table.add_replica(backend_tag, replica_tag)
    global_state.init_or_get_metric_monitor().add_target.remote(runner_handle)
Exemple #3
0
    def _scale_replicas(self, backends: Dict[BackendTag, BackendInfo],
                        backend_tag: BackendTag, num_replicas: int) -> None:
        """Scale the given backend to the number of replicas.

        NOTE: this does not actually start or stop the replicas, but instead
        adds the intention to start/stop them to self.workers_to_start and
        self.workers_to_stop. The caller is responsible for then first writing
        a checkpoint and then actually starting/stopping the intended replicas.
        This avoids inconsistencies with starting/stopping a worker and then
        crashing before writing a checkpoint.
        """
        logger.debug("Scaling backend '{}' to {} replicas".format(
            backend_tag, num_replicas))
        assert (
            backend_tag
            in backends), "Backend {} is not registered.".format(backend_tag)
        assert num_replicas >= 0, ("Number of replicas must be"
                                   " greater than or equal to 0.")

        current_num_replicas = len(self.replicas[backend_tag])
        delta_num_replicas = num_replicas - current_num_replicas

        backend_info = backends[backend_tag]
        if delta_num_replicas > 0:
            can_schedule = try_schedule_resources_on_nodes(requirements=[
                backend_info.replica_config.resource_dict
                for _ in range(delta_num_replicas)
            ])

            if _RESOURCE_CHECK_ENABLED and not all(can_schedule):
                num_possible = sum(can_schedule)
                raise RayServeException(
                    "Cannot scale backend {} to {} replicas. Ray Serve tried "
                    "to add {} replicas but the resources only allows {} "
                    "to be added. To fix this, consider scaling to replica to "
                    "{} or add more resources to the cluster. You can check "
                    "avaiable resources with ray.nodes().".format(
                        backend_tag, num_replicas, delta_num_replicas,
                        num_possible, current_num_replicas + num_possible))

            logger.debug("Adding {} replicas to backend {}".format(
                delta_num_replicas, backend_tag))
            for _ in range(delta_num_replicas):
                replica_tag = "{}#{}".format(backend_tag, get_random_letters())
                self.replicas_to_start[backend_tag].append(replica_tag)

        elif delta_num_replicas < 0:
            logger.debug("Removing {} replicas from backend '{}'".format(
                -delta_num_replicas, backend_tag))
            assert len(self.replicas[backend_tag]) >= delta_num_replicas
            for _ in range(-delta_num_replicas):
                replica_tag = self.replicas[backend_tag].pop()
                if len(self.replicas[backend_tag]) == 0:
                    del self.replicas[backend_tag]

                del self.workers[backend_tag][replica_tag]
                if len(self.workers[backend_tag]) == 0:
                    del self.workers[backend_tag]

                self.replicas_to_stop[backend_tag].append(replica_tag)
Exemple #4
0
def test_replica_name_from_str():
    replica_suffix = get_random_letters()
    actor_name = f"{ReplicaName.prefix}DeploymentA#{replica_suffix}"

    replica_name = ReplicaName.from_str(actor_name)
    assert (str(replica_name) == replica_name.replica_tag ==
            actor_name.replace(ReplicaName.prefix, ""))
Exemple #5
0
    def _set_backend_goal(self, backend_tag: BackendTag,
                          backend_info: Optional[BackendInfo]) -> None:
        """
        Set desirable state for a given backend, identified by tag.

        Args:
            backend_tag (BackendTag): Identifier of a backend
            backend_info (Optional[BackendInfo]): Contains backend and
                replica config, if passed in as None, we're marking
                target backend as shutting down.
        """
        existing_goal_id = self._backend_goals.get(backend_tag)
        new_goal_id = self._goal_manager.create_goal()

        if backend_info is not None:
            self._backend_metadata[backend_tag] = backend_info
            self._target_replicas[
                backend_tag] = backend_info.backend_config.num_replicas

            if backend_info.version is not None:
                code_version = backend_info.version
            else:
                code_version = get_random_letters()

            self._target_versions[backend_tag] = BackendVersion(
                code_version,
                user_config=backend_info.backend_config.user_config)

        else:
            self._target_replicas[backend_tag] = 0

        self._backend_goals[backend_tag] = new_goal_id

        return new_goal_id, existing_goal_id
Exemple #6
0
def test_replica_tag_formatting():
    deployment_tag = "DeploymentA"
    replica_suffix = get_random_letters()

    replica_name = ReplicaName(deployment_tag, replica_suffix)
    assert replica_name.replica_tag == f"{deployment_tag}#{replica_suffix}"
    assert str(replica_name) == f"{deployment_tag}#{replica_suffix}"
Exemple #7
0
async def test_split_traffic_random(ray_instance, mock_controller,
                                    task_runner_mock_actor):
    q = ray.remote(Router).remote(mock_controller)
    await q.setup_in_async_loop.remote()

    await mock_controller.set_traffic.remote(
        "svc", TrafficPolicy({
            "backend-split": 0.5,
            "backend-split-2": 0.5
        }))
    runner_1, runner_2 = [mock_task_runner() for _ in range(2)]
    await mock_controller.add_new_replica.remote("backend-split", runner_1)
    await mock_controller.add_new_replica.remote("backend-split-2", runner_2)

    # assume 50% split, the probability of all 20 requests goes to a
    # single queue is 0.5^20 ~ 1-6
    for _ in range(20):
        await q.assign_request.remote(
            RequestMetadata(get_random_letters(10), "svc", None), 1)

    got_work = [
        await runner.get_recent_call.remote()
        for runner in (runner_1, runner_2)
    ]
    assert [g.args[0] for g in got_work] == [1, 1]
Exemple #8
0
    async def _start_backend_replica(self, backend_tag):
        assert (backend_tag in self.backend_table.list_backends()
                ), "Backend {} is not registered.".format(backend_tag)

        replica_tag = "{}#{}".format(backend_tag, get_random_letters(length=6))

        # Register the worker in the DB.
        # TODO(edoakes): we should guarantee that if calls to the master
        # succeed, the cluster state has changed and if they fail, it hasn't.
        # Once we have master actor fault tolerance, this breaks that guarantee
        # because this method could fail after writing the replica to the DB.
        self.backend_table.add_replica(backend_tag, replica_tag)

        # Fetch the info to start the replica from the backend table.
        backend_actor = ray.remote(
            self.backend_table.get_backend_creator(backend_tag))
        backend_config_dict = self.backend_table.get_info(backend_tag)
        backend_config = BackendConfig(**backend_config_dict)
        init_args = [
            backend_tag, replica_tag,
            self.backend_table.get_init_args(backend_tag)
        ]
        kwargs = backend_config.get_actor_creation_args(init_args)

        # Start the worker.
        worker_handle = backend_actor._remote(**kwargs)
        self.tag_to_actor_handles[replica_tag] = worker_handle

        # Wait for the worker to start up.
        await worker_handle.ready.remote()
        await self.get_router()[0].add_new_worker.remote(
            backend_tag, worker_handle)

        # Register the worker with the metric monitor.
        self.get_metric_monitor()[0].add_target.remote(worker_handle)
Exemple #9
0
def test_replica_name_from_str():
    replica_suffix = get_random_letters()
    replica_tag = f"DeploymentA#{replica_suffix}"

    replica_name = ReplicaName.from_str(replica_tag)
    assert replica_name.replica_tag == replica_tag
    assert str(replica_tag) == replica_tag
Exemple #10
0
    def remote(self,
               request_data: Optional[Union[Dict, Any]] = None,
               **kwargs):
        """Issue an asynchrounous request to the endpoint.

        Returns a Ray ObjectRef whose results can be waited for or retrieved
        using ray.wait or ray.get, respectively.

        Returns:
            ray.ObjectRef
        Args:
            request_data(dict, Any): If it's a dictionary, the data will be
                available in ``request.json()`` or ``request.form()``.
                Otherwise, it will be available in ``request.data``.
            ``**kwargs``: All keyword arguments will be available in
                ``request.args``.
        """
        request_metadata = RequestMetadata(
            get_random_letters(10),  # Used for debugging.
            self.endpoint_name,
            TaskContext.Python,
            call_method=self.method_name or "__call__",
            shard_key=self.shard_key,
            http_method=self.http_method or "GET",
            http_headers=self.http_headers or dict(),
        )
        return self.router_handle.enqueue_request.remote(
            request_metadata, request_data, **kwargs)
Exemple #11
0
def test_is_replica_name():
    replica_suffix = get_random_letters()

    assert not ReplicaName.is_replica_name(f"DeploymentA##{replica_suffix}")
    assert not ReplicaName.is_replica_name(f"DeploymentA#{replica_suffix}")
    assert ReplicaName.is_replica_name(
        f"{ReplicaName.prefix}DeploymentA#{replica_suffix}")
Exemple #12
0
    async def _start_backend_replica(self, backend_tag):
        assert (backend_tag in self.backend_table.list_backends()
                ), "Backend {} is not registered.".format(backend_tag)

        replica_tag = "{}#{}".format(backend_tag, get_random_letters(length=6))

        # Fetch the info to start the replica from the backend table.
        creator = self.backend_table.get_backend_creator(backend_tag)
        backend_config_dict = self.backend_table.get_info(backend_tag)
        backend_config = BackendConfig(**backend_config_dict)
        init_args = self.backend_table.get_init_args(backend_tag)
        kwargs = backend_config.get_actor_creation_args(init_args)

        runner_handle = creator(kwargs)
        self.tag_to_actor_handles[replica_tag] = runner_handle

        # Set up the worker.

        await runner_handle._ray_serve_setup.remote(backend_tag,
                                                    self.get_router()[0],
                                                    runner_handle)
        ray.get(runner_handle._ray_serve_fetch.remote())

        # Register the worker in config tables and metric monitor.
        self.backend_table.add_replica(backend_tag, replica_tag)
        self.get_metric_monitor()[0].add_target.remote(runner_handle)
Exemple #13
0
    async def __call__(self, scope, receive, send):
        """Implements the ASGI protocol.

        See details at:
            https://asgi.readthedocs.io/en/latest/specs/index.html.
        """

        error_sender = self._make_error_sender(scope, receive, send)

        assert self.route_table is not None, (
            "Route table must be set via set_route_table.")
        assert scope["type"] == "http"
        current_path = scope["path"]

        self.request_counter.record(1, tags={"route": current_path})

        if current_path.startswith("/-/"):
            await self._handle_system_request(scope, receive, send)
            return

        try:
            endpoint_name, methods_allowed = self.route_table[current_path]
        except KeyError:
            error_message = (
                "Path {} not found. "
                "Please ping http://.../-/routes for routing table"
            ).format(current_path)
            await error_sender(error_message, 404)
            return

        if scope["method"] not in methods_allowed:
            error_message = ("Methods {} not allowed. "
                             "Available HTTP methods are {}.").format(
                                 scope["method"], methods_allowed)
            await error_sender(error_message, 405)
            return

        http_body_bytes = await self.receive_http_body(scope, receive, send)

        headers = {k.decode(): v.decode() for k, v in scope["headers"]}
        request_metadata = RequestMetadata(
            get_random_letters(10),  # Used for debugging.
            endpoint_name,
            TaskContext.Web,
            http_method=scope["method"].upper(),
            call_method=headers.get("X-SERVE-CALL-METHOD".lower(), "__call__"),
            shard_key=headers.get("X-SERVE-SHARD-KEY".lower(), None),
        )

        ref = await self.router.assign_request(request_metadata, scope,
                                               http_body_bytes)
        result = await ref

        if isinstance(result, RayTaskError):
            error_message = "Task Error. Traceback: {}.".format(result)
            await error_sender(error_message, 500)
        else:
            await Response(result).send(scope, receive, send)
Exemple #14
0
async def test_shard_key(ray_instance, mock_controller,
                         task_runner_mock_actor):
    q = ray.remote(Router).remote(mock_controller)
    await q.setup_in_async_loop.remote()

    num_backends = 5
    traffic_dict = {}
    runners = [mock_task_runner() for _ in range(num_backends)]
    for i, runner in enumerate(runners):
        backend_name = "backend-split-" + str(i)
        traffic_dict[backend_name] = 1.0 / num_backends
        await mock_controller.add_new_replica.remote(backend_name, runner)
    await mock_controller.set_traffic.remote("svc",
                                             TrafficPolicy(traffic_dict))

    # Generate random shard keys and send one request for each.
    shard_keys = [get_random_letters() for _ in range(100)]
    for shard_key in shard_keys:
        await q.assign_request.remote(
            RequestMetadata(get_random_letters(10),
                            "svc",
                            None,
                            shard_key=shard_key), shard_key)

    # Log the shard keys that were assigned to each backend.
    runner_shard_keys = defaultdict(set)
    for i, runner in enumerate(runners):
        calls = await runner.get_all_calls.remote()
        for call in calls:
            runner_shard_keys[i].add(call.args[0])
        await runner.clear_calls.remote()

    # Send queries with the same shard keys a second time.
    for shard_key in shard_keys:
        await q.assign_request.remote(
            RequestMetadata(get_random_letters(10),
                            "svc",
                            None,
                            shard_key=shard_key), shard_key)

    # Check that the requests were all mapped to the same backends.
    for i, runner in enumerate(runners):
        calls = await runner.get_all_calls.remote()
        for call in calls:
            assert call.args[0] in runner_shard_keys[i]
Exemple #15
0
 def _remote(self, deployment_name, handle_options, args, kwargs) -> Coroutine:
     request_metadata = RequestMetadata(
         get_random_letters(10),  # Used for debugging.
         deployment_name,
         call_method=handle_options.method_name,
         http_arg_is_pickled=self._pickled_http_request,
     )
     coro = self.router.assign_request(request_metadata, *args, **kwargs)
     return coro
Exemple #16
0
async def test_alter_backend(serve_instance, task_runner_mock_actor):
    q = ray.remote(Router).remote()
    await q.setup.remote("", serve_instance._controller_name)

    await q.set_traffic.remote("svc", TrafficPolicy({"backend-alter": 1}))
    await q.add_new_worker.remote("backend-alter", "replica-1",
                                  task_runner_mock_actor)
    await q.enqueue_request.remote(
        RequestMetadata(get_random_letters(10), "svc", None), 1)
    got_work = await task_runner_mock_actor.get_recent_call.remote()
    assert got_work.args[0] == 1

    await q.set_traffic.remote("svc", TrafficPolicy({"backend-alter-2": 1}))
    await q.add_new_worker.remote("backend-alter-2", "replica-1",
                                  task_runner_mock_actor)
    await q.enqueue_request.remote(
        RequestMetadata(get_random_letters(10), "svc", None), 2)
    got_work = await task_runner_mock_actor.get_recent_call.remote()
    assert got_work.args[0] == 2
Exemple #17
0
def test_invalid_name_from_str():
    replica_suffix = get_random_letters()

    replica_tag = f"DeploymentA##{replica_suffix}"
    with pytest.raises(AssertionError):
        ReplicaName.from_str(replica_tag)

    # No prefix
    replica_tag = f"DeploymentA#{replica_suffix}"
    with pytest.raises(AssertionError):
        ReplicaName.from_str(replica_tag)
Exemple #18
0
 def _remote(self, endpoint_name, handle_options, request_data,
             kwargs) -> Coroutine:
     request_metadata = RequestMetadata(
         get_random_letters(10),  # Used for debugging.
         endpoint_name,
         call_method=handle_options.method_name,
         shard_key=handle_options.shard_key,
         http_method=handle_options.http_method,
         http_headers=handle_options.http_headers,
     )
     coro = self.router.assign_request(request_metadata, request_data,
                                       **kwargs)
     return coro
Exemple #19
0
 def _remote(self, request_data, kwargs) -> Coroutine:
     request_metadata = RequestMetadata(
         get_random_letters(10),  # Used for debugging.
         self.endpoint_name,
         TaskContext.Python,
         call_method=self.method_name or "__call__",
         shard_key=self.shard_key,
         http_method=self.http_method or "GET",
         http_headers=self.http_headers or dict(),
     )
     coro = self.router.assign_request(request_metadata, request_data,
                                       **kwargs)
     return coro
Exemple #20
0
 def __init__(self,
              code_version: Optional[str],
              user_config: Optional[Any] = None):
     if code_version is not None and not isinstance(code_version, str):
         raise TypeError(
             f"code_version must be str, got {type(code_version)}.")
     if code_version is None:
         self.unversioned = True
         self.code_version = get_random_letters()
     else:
         self.unversioned = False
         self.code_version = code_version
     self.user_config_hash = self._hash_user_config(user_config)
     self._hash = hash((self.code_version, self.user_config_hash))
Exemple #21
0
    def __init__(self, code_version: Optional[str], user_config: Optional[Any] = None):
        if code_version is not None and not isinstance(code_version, str):
            raise TypeError(f"code_version must be str, got {type(code_version)}.")
        if code_version is None:
            self.unversioned = True
            self.code_version = get_random_letters()
        else:
            self.unversioned = False
            self.code_version = code_version

        self.user_config = user_config
        # TODO(simon): make this xlang compatible
        pickled_user_config = pickle.dumps(user_config)
        self.user_config_hash = crc32(pickled_user_config)
        self._hash = crc32(pickled_user_config + self.code_version.encode("utf-8"))
Exemple #22
0
async def test_single_prod_cons_queue(serve_instance, task_runner_mock_actor):
    q = ray.remote(Router).remote()
    await q.setup.remote("", serve_instance._controller_name)

    q.set_traffic.remote("svc", TrafficPolicy({"backend-single-prod": 1.0}))
    q.add_new_worker.remote("backend-single-prod", "replica-1",
                            task_runner_mock_actor)

    # Make sure we get the request result back
    result = await q.enqueue_request.remote(
        RequestMetadata(get_random_letters(10), "svc", None), 1)
    assert result == "DONE"

    # Make sure it's the right request
    got_work = await task_runner_mock_actor.get_recent_call.remote()
    assert got_work.args[0] == 1
    assert got_work.kwargs == {}
Exemple #23
0
async def test_simple_endpoint_backend_pair(ray_instance, mock_controller,
                                            task_runner_mock_actor):
    q = ray.remote(Router).remote(mock_controller, "svc")

    # Propogate configs
    await mock_controller.set_traffic.remote(
        "svc", TrafficPolicy({"backend-single-prod": 1.0}))
    await mock_controller.add_new_replica.remote("backend-single-prod",
                                                 task_runner_mock_actor)

    # Make sure we get the request result back
    ref = await q.assign_request.remote(
        RequestMetadata(get_random_letters(10), "svc"), 1)
    result = await ref
    assert result == "DONE"

    # Make sure it's the right request
    got_work = await task_runner_mock_actor.get_recent_call.remote()
    assert got_work.args[0] == 1
    assert got_work.kwargs == {}
Exemple #24
0
    def _set_backend_goal(self, backend_tag: BackendTag,
                          backend_info: Optional[BackendInfo]) -> None:
        existing_goal_id = self._backend_goals.get(backend_tag)
        new_goal_id = self._goal_manager.create_goal()

        if backend_info is not None:
            self._backend_metadata[backend_tag] = backend_info
            self._target_replicas[
                backend_tag] = backend_info.backend_config.num_replicas

            if backend_info.version is not None:
                version = backend_info.version
            else:
                version = get_random_letters()
            self._target_versions[backend_tag] = version
        else:
            self._target_replicas[backend_tag] = 0

        self._backend_goals[backend_tag] = new_goal_id

        return new_goal_id, existing_goal_id
Exemple #25
0
    def _scale_replicas(self, backend_tag, num_replicas):
        """Scale the given backend to the number of replicas.

        NOTE: this does not actually start or stop the replicas, but instead
        adds the intention to start/stop them to self.workers_to_start and
        self.workers_to_stop. The caller is responsible for then first writing
        a checkpoint and then actually starting/stopping the intended replicas.
        This avoids inconsistencies with starting/stopping a worker and then
        crashing before writing a checkpoint.
        """
        logger.debug("Scaling backend '{}' to {} replicas".format(
            backend_tag, num_replicas))
        assert (backend_tag in self.backends
                ), "Backend {} is not registered.".format(backend_tag)
        assert num_replicas >= 0, ("Number of replicas must be"
                                   " greater than or equal to 0.")

        current_num_replicas = len(self.replicas[backend_tag])
        delta_num_replicas = num_replicas - current_num_replicas

        if delta_num_replicas > 0:
            logger.debug("Adding {} replicas to backend {}".format(
                delta_num_replicas, backend_tag))
            for _ in range(delta_num_replicas):
                replica_tag = "{}#{}".format(backend_tag, get_random_letters())
                self.replicas_to_start[backend_tag].append(replica_tag)

        elif delta_num_replicas < 0:
            logger.debug("Removing {} replicas from backend {}".format(
                -delta_num_replicas, backend_tag))
            assert len(self.replicas[backend_tag]) >= delta_num_replicas
            for _ in range(-delta_num_replicas):
                replica_tag = self.replicas[backend_tag].pop()
                if len(self.replicas[backend_tag]) == 0:
                    del self.replicas[backend_tag]
                del self.workers[backend_tag][replica_tag]
                if len(self.workers[backend_tag]) == 0:
                    del self.workers[backend_tag]

                self.replicas_to_stop[backend_tag].append(replica_tag)
Exemple #26
0
def test_shard_key(serve_instance, route):
    client = serve_instance

    # Create five backends that return different integers.
    num_backends = 5
    traffic_dict = {}
    for i in range(num_backends):

        def function(_):
            return i

        backend_name = "backend-split-" + str(i)
        traffic_dict[backend_name] = 1.0 / num_backends
        client.create_backend(backend_name, function)

    client.create_endpoint("endpoint",
                           backend=list(traffic_dict.keys())[0],
                           route=route)
    client.set_traffic("endpoint", traffic_dict)

    def do_request(shard_key):
        if route is not None:
            url = "http://127.0.0.1:8000" + route
            headers = {"X-SERVE-SHARD-KEY": shard_key}
            result = requests.get(url, headers=headers).text
        else:
            handle = client.get_handle("endpoint").options(shard_key=shard_key)
            result = ray.get(handle.options(shard_key=shard_key).remote())
        return result

    # Send requests with different shard keys and log the backends they go to.
    shard_keys = [get_random_letters() for _ in range(20)]
    results = {}
    for shard_key in shard_keys:
        results[shard_key] = do_request(shard_key)

    # Check that the shard keys are mapped to the same backends.
    for shard_key in shard_keys:
        assert do_request(shard_key) == results[shard_key]
Exemple #27
0
def start(
    detached: bool = False,
    http_options: Optional[Union[dict, HTTPOptions]] = None,
    dedicated_cpu: bool = False,
    _checkpoint_path: str = DEFAULT_CHECKPOINT_PATH,
    **kwargs,
) -> ServeControllerClient:
    """Initialize a serve instance.

    By default, the instance will be scoped to the lifetime of the returned
    Client object (or when the script exits). If detached is set to True, the
    instance will instead persist until serve.shutdown() is called. This is
    only relevant if connecting to a long-running Ray cluster (e.g., with
    ray.init(address="auto") or ray.init("ray://<remote_addr>")).

    Args:
        detached: Whether not the instance should be detached from this
          script. If set, the instance will live on the Ray cluster until it is
          explicitly stopped with serve.shutdown().
        http_options (Optional[Dict, serve.HTTPOptions]): Configuration options
          for HTTP proxy. You can pass in a dictionary or HTTPOptions object
          with fields:

            - host(str, None): Host for HTTP servers to listen on. Defaults to
              "127.0.0.1". To expose Serve publicly, you probably want to set
              this to "0.0.0.0".
            - port(int): Port for HTTP server. Defaults to 8000.
            - root_path(str): Root path to mount the serve application
              (for example, "/serve"). All deployment routes will be prefixed
              with this path. Defaults to "".
            - middlewares(list): A list of Starlette middlewares that will be
              applied to the HTTP servers in the cluster. Defaults to [].
            - location(str, serve.config.DeploymentMode): The deployment
              location of HTTP servers:

                - "HeadOnly": start one HTTP server on the head node. Serve
                  assumes the head node is the node you executed serve.start
                  on. This is the default.
                - "EveryNode": start one HTTP server per node.
                - "NoServer" or None: disable HTTP server.
            - num_cpus (int): The number of CPU cores to reserve for each
              internal Serve HTTP proxy actor.  Defaults to 0.
        dedicated_cpu: Whether to reserve a CPU core for the internal
          Serve controller actor.  Defaults to False.
    """
    usage_lib.record_library_usage("serve")

    http_deprecated_args = ["http_host", "http_port", "http_middlewares"]
    for key in http_deprecated_args:
        if key in kwargs:
            raise ValueError(
                f"{key} is deprecated, please use serve.start(http_options="
                f'{{"{key}": {kwargs[key]}}}) instead.')
    # Initialize ray if needed.
    ray._private.worker.global_worker.filter_logs_by_job = False
    if not ray.is_initialized():
        ray.init(namespace=SERVE_NAMESPACE)

    try:
        client = get_global_client(_health_check_controller=True)
        logger.info(
            f'Connecting to existing Serve app in namespace "{SERVE_NAMESPACE}".'
        )

        _check_http_and_checkpoint_options(client, http_options,
                                           _checkpoint_path)
        return client
    except RayServeException:
        pass

    if detached:
        controller_name = SERVE_CONTROLLER_NAME
    else:
        controller_name = format_actor_name(get_random_letters(),
                                            SERVE_CONTROLLER_NAME)

    if isinstance(http_options, dict):
        http_options = HTTPOptions.parse_obj(http_options)
    if http_options is None:
        http_options = HTTPOptions()

    controller = ServeController.options(
        num_cpus=1 if dedicated_cpu else 0,
        name=controller_name,
        lifetime="detached" if detached else None,
        max_restarts=-1,
        max_task_retries=-1,
        # Pin Serve controller on the head node.
        resources={
            get_current_node_resource_key(): 0.01
        },
        namespace=SERVE_NAMESPACE,
        max_concurrency=CONTROLLER_MAX_CONCURRENCY,
    ).remote(
        controller_name,
        http_options,
        _checkpoint_path,
        detached=detached,
    )

    proxy_handles = ray.get(controller.get_http_proxies.remote())
    if len(proxy_handles) > 0:
        try:
            ray.get(
                [handle.ready.remote() for handle in proxy_handles.values()],
                timeout=HTTP_PROXY_TIMEOUT,
            )
        except ray.exceptions.GetTimeoutError:
            raise TimeoutError(
                f"HTTP proxies not available after {HTTP_PROXY_TIMEOUT}s.")

    client = ServeControllerClient(
        controller,
        controller_name,
        detached=detached,
    )
    set_global_client(client)
    logger.info(f"Started{' detached ' if detached else ' '}Serve instance in "
                f'namespace "{SERVE_NAMESPACE}".')
    return client
Exemple #28
0
def start(detached: bool = False,
          http_host: str = DEFAULT_HTTP_HOST,
          http_port: int = DEFAULT_HTTP_PORT,
          http_middlewares: List[Any] = []) -> Client:
    """Initialize a serve instance.

    By default, the instance will be scoped to the lifetime of the returned
    Client object (or when the script exits). If detached is set to True, the
    instance will instead persist until client.shutdown() is called and clients
    to it can be connected using serve.connect(). This is only relevant if
    connecting to a long-running Ray cluster (e.g., with address="auto").

    Args:
        detached (bool): Whether not the instance should be detached from this
            script.
        http_host (str): Host for HTTP servers to listen on. Defaults to
            "127.0.0.1". To expose Serve publicly, you probably want to set
            this to "0.0.0.0". One HTTP server will be started on each node in
            the Ray cluster.
        http_port (int): Port for HTTP server. Defaults to 8000.
        http_middlewares (list): A list of Starlette middlewares that will be
            applied to the HTTP servers in the cluster.
    """
    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init()

    # Try to get serve controller if it exists
    if detached:
        controller_name = SERVE_CONTROLLER_NAME
        try:
            ray.get_actor(controller_name)
            raise RayServeException("Called serve.start(detached=True) but a "
                                    "detached instance is already running. "
                                    "Please use serve.connect() to connect to "
                                    "the running instance instead.")
        except ValueError:
            pass
    else:
        controller_name = format_actor_name(SERVE_CONTROLLER_NAME,
                                            get_random_letters())

    controller = ServeController.options(
        name=controller_name,
        lifetime="detached" if detached else None,
        max_restarts=-1,
        max_task_retries=-1,
    ).remote(controller_name,
             http_host,
             http_port,
             http_middlewares,
             detached=detached)

    futures = []
    for node_id in ray.state.node_ids():
        future = block_until_http_ready.options(
            num_cpus=0, resources={
                node_id: 0.01
            }).remote("http://{}:{}/-/routes".format(http_host, http_port),
                      timeout=HTTP_PROXY_TIMEOUT)
        futures.append(future)
    ray.get(futures)

    return Client(controller, controller_name, detached=detached)
Exemple #29
0
    def _scale_backend_replicas(
        self,
        backend_tag: BackendTag,
        target_replicas: int,
        target_version: str,
    ) -> bool:
        """Scale the given backend to the number of replicas.

        NOTE: this does not actually start or stop the replicas, but instead
        adds them to ReplicaState.SHOULD_START or ReplicaState.SHOULD_STOP.
        The caller is responsible for then first writing a checkpoint and then
        actually starting/stopping the intended replicas. This avoids
        inconsistencies with starting/stopping a replica and then crashing
        before writing a checkpoint.
        """
        assert (backend_tag in self._backend_metadata
                ), "Backend {} is not registered.".format(backend_tag)
        assert target_replicas >= 0, ("Number of replicas must be"
                                      " greater than or equal to 0.")

        backend_info: BackendInfo = self._backend_metadata[backend_tag]
        graceful_shutdown_timeout_s = (
            backend_info.backend_config.
            experimental_graceful_shutdown_timeout_s)

        stopped = self._stop_wrong_version_replicas(
            self._replicas[backend_tag], target_replicas, target_version,
            graceful_shutdown_timeout_s)
        if stopped > 0:
            logger.info(f"Stopping {stopped} replicas of backend "
                        f"'{backend_tag}' with outdated versions.")

        current_replicas = self._replicas[backend_tag].count(states=[
            ReplicaState.SHOULD_START, ReplicaState.STARTING,
            ReplicaState.RUNNING
        ])

        delta_replicas = target_replicas - current_replicas
        if delta_replicas == 0:
            return False

        elif delta_replicas > 0:
            # Don't ever exceed target_replicas.
            stopping_replicas = self._replicas[backend_tag].count(states=[
                ReplicaState.SHOULD_STOP,
                ReplicaState.STOPPING,
            ])
            to_add = max(delta_replicas - stopping_replicas, 0)
            if to_add > 0:
                logger.info(f"Adding {to_add} replicas "
                            f"to backend '{backend_tag}'.")
            for _ in range(to_add):
                replica_tag = "{}#{}".format(backend_tag, get_random_letters())
                self._replicas[backend_tag].add(
                    ReplicaState.SHOULD_START,
                    BackendReplica(self._controller_name, self._detached,
                                   replica_tag, backend_tag, target_version))

        elif delta_replicas < 0:
            to_remove = -delta_replicas
            logger.info(f"Removing {to_remove} replicas "
                        f"from backend '{backend_tag}'.")
            replicas_to_stop = self._replicas[backend_tag].pop(
                states=[
                    ReplicaState.SHOULD_START, ReplicaState.STARTING,
                    ReplicaState.RUNNING
                ],
                max_replicas=to_remove)

            for replica in replicas_to_stop:
                replica.set_should_stop(graceful_shutdown_timeout_s)
                self._replicas[backend_tag].add(ReplicaState.SHOULD_STOP,
                                                replica)

        return True
Exemple #30
0
def make_request_param(call_method="__call__"):
    return RequestMetadata(get_random_letters(10),
                           "endpoint",
                           context.TaskContext.Python,
                           call_method=call_method)