Ejemplo n.º 1
0
    def remote(self, *args, **kwargs):
        if len(args) != 0:
            raise RayServeException(
                "handle.remote must be invoked with keyword arguments.")

        method_name = self.method_name
        if method_name is None:
            method_name = "__call__"

        # create RequestMetadata instance
        request_in_object = RequestMetadata(
            self.endpoint_name,
            TaskContext.Python,
            self.relative_slo_ms,
            self.absolute_slo_ms,
            call_method=method_name,
        )
        return self.router_handle.enqueue_request.remote(
            request_in_object, **kwargs)
Ejemplo n.º 2
0
    def get_runner_method(self, request_item: Query) -> Callable:
        method_name = request_item.metadata.call_method
        if not hasattr(self.callable, method_name):
            # Filter to methods that don't start with '__' prefix.
            def callable_method_filter(attr):
                if attr.startswith("__"):
                    return False
                elif not callable(getattr(self.callable, attr)):
                    return False

                return True

            methods = list(filter(callable_method_filter, dir(self.callable)))
            raise RayServeException(f"Tried to call a method '{method_name}' "
                                    "that does not exist. Available methods: "
                                    f"{methods}.")
        if self.is_function:
            return self.callable
        return getattr(self.callable, method_name)
Ejemplo n.º 3
0
 async def reconfigure(self, user_config: Any):
     async with self.rwlock.writer_lock:
         self.user_config = user_config
         self.version = DeploymentVersion(
             self.version.code_version, user_config=user_config
         )
         if self.is_function:
             raise ValueError("deployment_def must be a class to use user_config")
         elif not hasattr(self.callable, RECONFIGURE_METHOD):
             raise RayServeException(
                 "user_config specified but deployment "
                 + self.deployment_name
                 + " missing "
                 + RECONFIGURE_METHOD
                 + " method"
             )
         reconfigure_method = sync_to_async(
             getattr(self.callable, RECONFIGURE_METHOD)
         )
         await reconfigure_method(user_config)
Ejemplo n.º 4
0
def get_replica_context() -> ReplicaContext:
    """When called from a backend, returns the backend tag and replica tag.

    When not called from a backend, returns None.

    A replica tag uniquely identifies a single replica for a Ray Serve
    backend at runtime.  Replica tags are of the form
    `<backend tag>#<random letters>`.

    Raises:
        RayServeException: if not called from within a Ray Serve backend
    Example:
        >>> serve.get_replica_context().backend_tag # my_backend
        >>> serve.get_replica_context().replica_tag # my_backend#krcwoa
    """
    if _INTERNAL_REPLICA_CONTEXT is None:
        raise RayServeException("`serve.get_replica_context()` "
                                "may only be called from within a "
                                "Ray Serve backend.")
    return _INTERNAL_REPLICA_CONTEXT
Ejemplo n.º 5
0
def get_replica_context() -> ReplicaContext:
    """If called from a deployment, returns the deployment and replica tag.

    A replica tag uniquely identifies a single replica for a Ray Serve
    deployment at runtime.  Replica tags are of the form
    `<deployment_name>#<random letters>`.

    Raises:
        RayServeException: if not called from within a Ray Serve deployment.
    Example:
        >>> serve.get_replica_context().deployment # deployment_name
        >>> serve.get_replica_context().replica_tag # deployment_name#krcwoa
    """
    if _INTERNAL_REPLICA_CONTEXT is None:
        raise RayServeException(
            "`serve.get_replica_context()` "
            "may only be called from within a "
            "Ray Serve deployment."
        )
    return _INTERNAL_REPLICA_CONTEXT
Ejemplo n.º 6
0
    async def assign_request(
            self,
            request_meta: RequestMetadata,
            *request_args,
            **request_kwargs,
    ):
        """Assign a query and returns an object ref represent the result"""
        endpoint = request_meta.endpoint
        query = Query(
            args=list(request_args),
            kwargs=request_kwargs,
            metadata=request_meta,
        )

        if endpoint not in self.endpoint_policies:
            logger.info(
                f"Endpoint {endpoint} doesn't exist, waiting for registration."
            )
            future = asyncio.get_event_loop().create_future()
            if endpoint not in self._pending_endpoints:
                self._pending_endpoints[endpoint] = future
            endpoint_status = await self._pending_endpoints[endpoint]
            if endpoint_status == _PendingEndpointFound.REMOVED:
                raise RayServeException(
                    f"Endpoint {endpoint} was removed. This request "
                    "cannot be completed.")
            logger.info(f"Endpoint {endpoint} registered.")

        endpoint_policy = self.endpoint_policies[endpoint]
        chosen_backend, *shadow_backends = endpoint_policy.assign(query)

        result_ref = await self._get_or_create_replica_set(
            chosen_backend).assign_replica(query)
        for backend in shadow_backends:
            (await self._get_or_create_replica_set(backend)
             .assign_replica(query))

        self.num_router_requests.inc(tags={"endpoint": endpoint})

        return result_ref
Ejemplo n.º 7
0
Archivo: utils.py Proyecto: stoep/ray
def get_conda_env_dir(env_name):
    """Given a environment name like `tf1`, find and validate the
    corresponding conda directory. Untested on Windows.
    """
    conda_prefix = os.environ.get("CONDA_PREFIX")
    if conda_prefix is None:
        # The caller is neither in a conda env or in (base).  This is rare
        # because by default, new terminals start in (base), but we can still
        # support this case.
        conda_exe = os.environ.get("CONDA_EXE")
        if conda_exe is None:
            raise RayServeException(
                "Ray Serve cannot find environment variables set by conda. "
                "Please verify conda is installed.")
        # Example: CONDA_EXE=$HOME/anaconda3/bin/python
        # Strip out the /bin/python by going up two parent directories.
        conda_prefix = str(Path(conda_exe).parent.parent)

    # There are two cases:
    # 1. We are in conda base env: CONDA_DEFAULT_ENV=base and
    #    CONDA_PREFIX=$HOME/anaconda3
    # 2. We are in user created conda env: CONDA_DEFAULT_ENV=$env_name and
    #    CONDA_PREFIX=$HOME/anaconda3/envs/$env_name
    if os.environ.get("CONDA_DEFAULT_ENV") == "base":
        # Caller is running in base conda env.
        # Not recommended by conda, but we can still try to support it.
        env_dir = os.path.join(conda_prefix, "envs", env_name)
    else:
        # Now `conda_prefix` should be something like
        # $HOME/anaconda3/envs/$env_name
        # We want to strip the $env_name component.
        conda_envs_dir = os.path.split(conda_prefix)[0]
        env_dir = os.path.join(conda_envs_dir, env_name)
    if not os.path.isdir(env_dir):
        raise ValueError(
            "conda env " + env_name +
            " not found in conda envs directory. Run `conda env list` to " +
            "verify the name is correct.")
    return env_dir
Ejemplo n.º 8
0
def _connect() -> Client:
    """Connect to an existing Serve instance on this Ray cluster.

    If calling from the driver program, the Serve instance on this Ray cluster
    must first have been initialized using `serve.start(detached=True)`.

    If called from within a replica, this will connect to the same Serve
    instance that the replica is running in.
    """

    # Initialize ray if needed.
    ray.worker.global_worker.filter_logs_by_job = False
    if not ray.is_initialized():
        ray.init(namespace="serve")

    # When running inside of a replica, _INTERNAL_REPLICA_CONTEXT is set to
    # ensure that the correct instance is connected to.
    if _INTERNAL_REPLICA_CONTEXT is None:
        controller_name = SERVE_CONTROLLER_NAME
        controller_namespace = _get_controller_namespace(detached=True)
    else:
        controller_name = _INTERNAL_REPLICA_CONTEXT._internal_controller_name
        controller_namespace = _get_controller_namespace(detached=False)

    # Try to get serve controller if it exists
    try:
        controller = ray.get_actor(controller_name, namespace=controller_namespace)
    except ValueError:
        raise RayServeException(
            "There is no "
            "instance running on this Ray cluster. Please "
            "call `serve.start(detached=True) to start "
            "one."
        )

    client = Client(controller, controller_name, detached=True)
    _set_global_client(client)
    return client
Ejemplo n.º 9
0
    async def assign_request(
        self,
        request_meta: RequestMetadata,
        *request_args,
        **request_kwargs,
    ):
        """Assign a query and returns an object ref represent the result"""
        endpoint = request_meta.endpoint
        query = Query(
            args=list(request_args),
            kwargs=request_kwargs,
            metadata=request_meta,
        )

        if not self._pending_endpoint_registered.is_set():
            # This can happen when the router is created but the endpoint
            # information hasn't been retrieved via long-poll yet.
            try:
                await asyncio.wait_for(
                    self._pending_endpoint_registered.wait(),
                    timeout=5,
                )
            except asyncio.TimeoutError:
                raise RayServeException(
                    f"Endpoint {endpoint} doesn't exist after 5s timeout. "
                    "Marking the query failed.")

        chosen_backend, *shadow_backends = self.endpoint_policy.assign(query)

        result_ref = await self._get_or_create_replica_set(
            chosen_backend).assign_replica(query)
        for backend in shadow_backends:
            (await
             self._get_or_create_replica_set(backend).assign_replica(query))

        self.num_router_requests.inc(tags={"endpoint": endpoint})

        return result_ref
Ejemplo n.º 10
0
def get_replica_context() -> ReplicaContext:
    """If called from a deployment, returns the deployment and replica tag.

    A replica tag uniquely identifies a single replica for a Ray Serve
    deployment at runtime.  Replica tags are of the form
    `<deployment_name>#<random letters>`.

    Raises:
        RayServeException: if not called from within a Ray Serve deployment.

    Example:
        >>> from ray import serve
        >>> # deployment_name
        >>> serve.get_replica_context().deployment # doctest: +SKIP
        >>> # deployment_name#krcwoa
        >>> serve.get_replica_context().replica_tag # doctest: +SKIP
    """
    internal_replica_context = get_internal_replica_context()
    if internal_replica_context is None:
        raise RayServeException("`serve.get_replica_context()` "
                                "may only be called from within a "
                                "Ray Serve deployment.")
    return internal_replica_context
Ejemplo n.º 11
0
 def check(self, *args, **kwargs):
     if self._shutdown:
         raise RayServeException("Client has already been shut down.")
     return f(self, *args, **kwargs)
Ejemplo n.º 12
0
def create_backend(func_or_class,
                   backend_tag,
                   *actor_init_args,
                   backend_config=BackendConfig()):
    """Create a backend using func_or_class and assign backend_tag.

    Args:
        func_or_class (callable, class): a function or a class implements
            __call__ protocol.
        backend_tag (str): a unique tag assign to this backend. It will be used
            to associate services in traffic policy.
        backend_config (BackendConfig): An object defining backend properties
        for starting a backend.
        *actor_init_args (optional): the argument to pass to the class
            initialization method.
    """
    assert isinstance(backend_config,
                      BackendConfig), ("backend_config must be"
                                       " of instance BackendConfig")
    backend_config_dict = dict(backend_config)

    should_accept_batch = (True if backend_config.max_batch_size is not None
                           else False)
    batch_annotation_not_found = RayServeException(
        "max_batch_size is set in config but the function or method does not "
        "accept batching. Please use @serve.accept_batch to explicitly mark "
        "the function or method as batchable and takes in list as arguments.")

    arg_list = []
    if inspect.isfunction(func_or_class):
        if should_accept_batch and not hasattr(func_or_class,
                                               "serve_accept_batch"):
            raise batch_annotation_not_found

        # arg list for a fn is function itself
        arg_list = [func_or_class]
        # ignore lint on lambda expression
        creator = lambda kwrgs: TaskRunnerActor._remote(**kwrgs)  # noqa: E731
    elif inspect.isclass(func_or_class):
        if should_accept_batch and not hasattr(func_or_class.__call__,
                                               "serve_accept_batch"):
            raise batch_annotation_not_found

        # Python inheritance order is right-to-left. We put RayServeMixin
        # on the left to make sure its methods are not overriden.
        @ray.remote
        class CustomActor(RayServeMixin, func_or_class):
            pass

        arg_list = actor_init_args
        # ignore lint on lambda expression
        creator = lambda kwargs: CustomActor._remote(**kwargs)  # noqa: E731
    else:
        raise TypeError(
            "Backend must be a function or class, it is {}.".format(
                type(func_or_class)))

    # save creator which starts replicas
    global_state.backend_table.register_backend(backend_tag, creator)

    # save information about configurations needed to start the replicas
    global_state.backend_table.register_info(backend_tag, backend_config_dict)

    # save the initial arguments needed by replicas
    global_state.backend_table.save_init_args(backend_tag, arg_list)

    # set the backend config inside the router
    # particularly for max-batch-size
    ray.get(global_state.init_or_get_router().set_backend_config.remote(
        backend_tag, backend_config_dict))
    scale(backend_tag, backend_config_dict["num_replicas"])
Ejemplo n.º 13
0
 def check(*args, **kwargs):
     if _get_master_actor() is None:
         raise RayServeException("Please run serve.init to initialize or "
                                 "connect to existing ray serve cluster.")
     return f(*args, **kwargs)
Ejemplo n.º 14
0
 def __reduce__(self):
     raise RayServeException(("Ray Serve client cannot be serialized."))
Ejemplo n.º 15
0
    def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument it turned into list.
                arg_list.append(FakeFlaskRequest())

        try:
            # check mixing of query context
            # unified context needed
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")

            serve_context.web = context_flags.pop()
            serve_context.batch_size = batch_size
            # Flask requests are passed to __call__ as a list
            arg_list = [arg_list]

            start_timestamp = time.time()
            result_list = self.__call__(*arg_list, **kwargs_list)

            self._serve_metric_latency_list.append(time.time() -
                                                   start_timestamp)
            if (not isinstance(result_list,
                               list)) or (len(result_list) != batch_size):
                raise RayServeException("__call__ function "
                                        "doesn't preserve batch-size. "
                                        "Please return a list of result "
                                        "with length equals to the batch "
                                        "size.")
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self._serve_metric_error_counter += batch_size
            return [wrapped_exception for _ in range(batch_size)]
Ejemplo n.º 16
0
 async def form(self):
     """The request dictionary, from ``handle.remote(dict)``."""
     if not isinstance(self._data, dict):
         raise RayServeException("Request data is not a dictionary. "
                                 f"It is {type(self._data)}.")
     return self._data
Ejemplo n.º 17
0
def start(
        detached: bool = False,
        http_host: Optional[str] = DEFAULT_HTTP_HOST,
        http_port: int = DEFAULT_HTTP_PORT,
        http_middlewares: List[Any] = [],
        http_options: Optional[Union[dict, HTTPOptions]] = None,
) -> Client:
    """Initialize a serve instance.

    By default, the instance will be scoped to the lifetime of the returned
    Client object (or when the script exits). If detached is set to True, the
    instance will instead persist until client.shutdown() is called and clients
    to it can be connected using serve.connect(). This is only relevant if
    connecting to a long-running Ray cluster (e.g., with address="auto").

    Args:
        detached (bool): Whether not the instance should be detached from this
          script.
        http_host (Optional[str]): Deprecated, use http_options instead.
        http_port (int): Deprecated, use http_options instead.
        http_middlewares (list): Deprecated, use http_options instead.
        http_options (Optional[Dict, serve.HTTPOptions]): Configuration options
          for HTTP proxy. You can pass in a dictionary or HTTPOptions object
          with fields:

            - host(str, None): Host for HTTP servers to listen on. Defaults to
              "127.0.0.1". To expose Serve publicly, you probably want to set
              this to "0.0.0.0".
            - port(int): Port for HTTP server. Defaults to 8000.
            - middlewares(list): A list of Starlette middlewares that will be
              applied to the HTTP servers in the cluster.
            - location(str, serve.config.DeploymentMode): The deployment
              location of HTTP servers:

                - "HeadOnly": start one HTTP server on the head node. Serve
                  assumes the head node is the node you executed serve.start
                  on. This is the default.
                - "EveryNode": start one HTTP server per node.
                - "NoServer" or None: disable HTTP server.
    """
    if ((http_host != DEFAULT_HTTP_HOST) or (http_port != DEFAULT_HTTP_PORT)
            or (len(http_middlewares) != 0)):
        if http_options is not None:
            raise ValueError(
                "You cannot specify both `http_options` and any of the "
                "`http_host`, `http_port`, and `http_middlewares` arguments. "
                "`http_options` is preferred.")
        else:
            warn(
                "`http_host`, `http_port`, `http_middlewares` are deprecated. "
                "Please use serve.start(http_options={'host': ..., "
                "'port': ..., middlewares': ...}) instead.",
                DeprecationWarning,
            )

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init()

    register_custom_serializers()

    # Try to get serve controller if it exists
    if detached:
        controller_name = SERVE_CONTROLLER_NAME
        try:
            ray.get_actor(controller_name)
            raise RayServeException("Called serve.start(detached=True) but a "
                                    "detached instance is already running. "
                                    "Please use serve.connect() to connect to "
                                    "the running instance instead.")
        except ValueError:
            pass
    else:
        controller_name = format_actor_name(SERVE_CONTROLLER_NAME,
                                            get_random_letters())

    if isinstance(http_options, dict):
        http_options = HTTPOptions.parse_obj(http_options)
    if http_options is None:
        http_options = HTTPOptions(
            host=http_host, port=http_port, middlewares=http_middlewares)

    controller = ServeController.options(
        name=controller_name,
        lifetime="detached" if detached else None,
        max_restarts=-1,
        max_task_retries=-1,
        # Pin Serve controller on the head node.
        resources={
            get_current_node_resource_key(): 0.01
        },
    ).remote(
        controller_name,
        http_options,
        detached=detached,
    )

    proxy_handles = ray.get(controller.get_http_proxies.remote())
    if len(proxy_handles) > 0:
        try:
            ray.get(
                [handle.ready.remote() for handle in proxy_handles.values()],
                timeout=HTTP_PROXY_TIMEOUT,
            )
        except ray.exceptions.GetTimeoutError:
            raise TimeoutError(
                "HTTP proxies not available after {HTTP_PROXY_TIMEOUT}s.")

    client = Client(controller, controller_name, detached=detached)
    _set_global_client(client)
    return client
Ejemplo n.º 18
0
def make_fastapi_class_based_view(fastapi_app, cls: Type) -> None:
    """Transform the `cls`'s methods and class annotations to FastAPI routes.

    Modified from
    https://github.com/dmontagu/fastapi-utils/blob/master/fastapi_utils/cbv.py

    Usage:
    >>> app = FastAPI()
    >>> class A:
            @app.route("/{i}")
            def func(self, i: int) -> str:
                return self.dep + i
    >>> # just running the app won't work, here.
    >>> make_fastapi_class_based_view(app, A)
    >>> # now app can be run properly
    """
    # Delayed import to prevent ciruclar imports in workers.
    from fastapi import Depends, APIRouter
    from fastapi.routing import APIRoute

    def get_current_servable_instance():
        from ray import serve
        return serve.get_replica_context().servable_object

    # Find all the class method routes
    class_method_routes = [
        route for route in fastapi_app.routes if
        # User defined routes must all be APIRoute.
        isinstance(route, APIRoute)
        # We want to find the route that's bound to the `cls`.
        # NOTE(simon): we can't use `route.endpoint in inspect.getmembers(cls)`
        # because the FastAPI supports different routes for the methods with
        # same name. See #17559.
        and (cls.__qualname__ in route.endpoint.__qualname__)
    ]

    # Modify these routes and mount it to a new APIRouter.
    # We need to to this (instead of modifying in place) because we want to use
    # the laster fastapi_app.include_router to re-run the dependency analysis
    # for each routes.
    new_router = APIRouter()
    for route in class_method_routes:
        fastapi_app.routes.remove(route)

        # This block just adds a default values to the self parameters so that
        # FastAPI knows to inject the object when calling the route.
        # Before: def method(self, i): ...
        # After: def method(self=Depends(...), *, i):...
        old_endpoint = route.endpoint
        old_signature = inspect.signature(old_endpoint)
        old_parameters = list(old_signature.parameters.values())
        if len(old_parameters) == 0:
            # TODO(simon): make it more flexible to support no arguments.
            raise RayServeException(
                "Methods in FastAPI class-based view must have ``self`` as "
                "their first argument.")
        old_self_parameter = old_parameters[0]
        new_self_parameter = old_self_parameter.replace(
            default=Depends(get_current_servable_instance))
        new_parameters = [new_self_parameter] + [
            # Make the rest of the parameters keyword only because
            # the first argument is no longer positional.
            parameter.replace(kind=inspect.Parameter.KEYWORD_ONLY)
            for parameter in old_parameters[1:]
        ]
        new_signature = old_signature.replace(parameters=new_parameters)
        setattr(route.endpoint, "__signature__", new_signature)
        setattr(route.endpoint, "_serve_cls", cls)
        new_router.routes.append(route)
    fastapi_app.include_router(new_router)

    routes = fastapi_app.routes
    for route in routes:
        if not isinstance(route, APIRoute):
            continue

        # If there is a response model, FastAPI creates a copy of the fields.
        # But FastAPI creates the field incorrectly by missing the outer_type_.
        if route.response_model:
            original_resp_fields = (
                route.response_field.outer_type_.__fields__)
            cloned_resp_fields = (
                route.secure_cloned_response_field.outer_type_.__fields__)
            for key, field in cloned_resp_fields.items():
                field.outer_type_ = original_resp_fields[key].outer_type_

        # Remove endpoints that belong to other class based views.
        serve_cls = getattr(route.endpoint, "_serve_cls", None)
        if serve_cls is not None and serve_cls != cls:
            routes.remove(route)
Ejemplo n.º 19
0
Archivo: api.py Proyecto: yukingx/ray
 def __reduce__(self):
     raise RayServeException(
         ("Ray Serve client cannot be serialized. Please use "
          "serve.connect() to get a client from within a backend."))
Ejemplo n.º 20
0
Archivo: api.py Proyecto: yukingx/ray
def start(detached: bool = False,
          http_host: str = DEFAULT_HTTP_HOST,
          http_port: int = DEFAULT_HTTP_PORT,
          http_middlewares: List[Any] = []) -> Client:
    """Initialize a serve instance.

    By default, the instance will be scoped to the lifetime of the returned
    Client object (or when the script exits). If detached is set to True, the
    instance will instead persist until client.shutdown() is called and clients
    to it can be connected using serve.connect(). This is only relevant if
    connecting to a long-running Ray cluster (e.g., with address="auto").

    Args:
        detached (bool): Whether not the instance should be detached from this
            script.
        http_host (str): Host for HTTP servers to listen on. Defaults to
            "127.0.0.1". To expose Serve publicly, you probably want to set
            this to "0.0.0.0". One HTTP server will be started on each node in
            the Ray cluster.
        http_port (int): Port for HTTP server. Defaults to 8000.
        http_middlewares (list): A list of Starlette middlewares that will be
            applied to the HTTP servers in the cluster.
    """
    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init()

    # Try to get serve controller if it exists
    if detached:
        controller_name = SERVE_CONTROLLER_NAME
        try:
            ray.get_actor(controller_name)
            raise RayServeException("Called serve.start(detached=True) but a "
                                    "detached instance is already running. "
                                    "Please use serve.connect() to connect to "
                                    "the running instance instead.")
        except ValueError:
            pass
    else:
        controller_name = format_actor_name(SERVE_CONTROLLER_NAME,
                                            get_random_letters())

    controller = ServeController.options(
        name=controller_name,
        lifetime="detached" if detached else None,
        max_restarts=-1,
        max_task_retries=-1,
    ).remote(controller_name,
             http_host,
             http_port,
             http_middlewares,
             detached=detached)

    futures = []
    for node_id in ray.state.node_ids():
        future = block_until_http_ready.options(
            num_cpus=0, resources={
                node_id: 0.01
            }).remote("http://{}:{}/-/routes".format(http_host, http_port),
                      timeout=HTTP_PROXY_TIMEOUT)
        futures.append(future)
    ray.get(futures)

    return Client(controller, controller_name, detached=detached)
Ejemplo n.º 21
0
    async def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)
        call_methods = set()

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            call_method = self.get_runner_method(item)
            call_methods.add(call_method)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument is turned into list.
                if self.has_positional_args(call_method):
                    arg_list.append(FakeFlaskRequest())

        try:
            # Check mixing of query context (unified context needed).
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")
            serve_context.web = context_flags.pop()

            if len(call_methods) != 1:
                raise RayServeException(
                    "Queries contain mixed calling methods. Please only send "
                    "the same type of requests in batching mode.")
            call_method = ensure_async(call_methods.pop())

            serve_context.batch_size = batch_size
            # Flask requests are passed to __call__ as a list
            arg_list = [arg_list]

            start_timestamp = time.time()
            result_list = await call_method(*arg_list, **kwargs_list)

            self.latency_list.append(time.time() - start_timestamp)
            if (not isinstance(result_list,
                               list)) or (len(result_list) != batch_size):
                error_message = ("Worker doesn't preserve batch size. The "
                                 "input has length {} but the returned list "
                                 "has length {}. Please return a list of "
                                 "results with length equal to the batch size"
                                 ".".format(batch_size, len(result_list)))
                raise RayServeException(error_message)
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self.error_counter += batch_size
            return [wrapped_exception for _ in range(batch_size)]
Ejemplo n.º 22
0
    async def invoke_batch(self, request_item_list):
        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)
        call_methods = set()

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            call_method = self.get_runner_method(item)
            call_methods.add(call_method)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument is turned into list.
                if self.has_positional_args(call_method):
                    arg_list.append(FakeFlaskRequest())

        try:
            # Check mixing of query context (unified context needed).
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")
            serve_context.web = context_flags.pop()

            if len(call_methods) != 1:
                raise RayServeException(
                    "Queries contain mixed calling methods. Please only send "
                    "the same type of requests in batching mode.")
            call_method = ensure_async(call_methods.pop())

            serve_context.batch_size = batch_size
            # Flask requests are passed to __call__ as a list
            arg_list = [arg_list]

            self.request_counter.add(batch_size)
            result_list = await call_method(*arg_list, **kwargs_list)

            if not isinstance(result_list, Iterable) or isinstance(
                    result_list, (dict, set)):
                error_message = ("RayServe expects an ordered iterable object "
                                 "but the worker returned a {}".format(
                                     type(result_list)))
                raise RayServeException(error_message)

            # Normalize the result into a list type. This operation is fast
            # in Python because it doesn't copy anything.
            result_list = list(result_list)

            if (len(result_list) != batch_size):
                error_message = ("Worker doesn't preserve batch size. The "
                                 "input has length {} but the returned list "
                                 "has length {}. Please return a list of "
                                 "results with length equal to the batch size"
                                 ".".format(batch_size, len(result_list)))
                raise RayServeException(error_message)
            self._reset_context()
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self.error_counter.add()
            self._reset_context()
            return [wrapped_exception for _ in range(batch_size)]
Ejemplo n.º 23
0
    def scale_backend_replicas(
        self,
        backend_tag: BackendTag,
        num_replicas: int,
        force_kill: bool = False,
    ) -> None:
        """Scale the given backend to the number of replicas.

        NOTE: this does not actually start or stop the replicas, but instead
        adds the intention to start/stop them to self.backend_replicas_to_start
        and self.backend_replicas_to_stop. The caller is responsible for then
        first writing a checkpoint and then actually starting/stopping the
        intended replicas. This avoids inconsistencies with starting/stopping a
        replica and then crashing before writing a checkpoint.
        """

        logger.debug("Scaling backend '{}' to {} replicas".format(
            backend_tag, num_replicas))
        assert (backend_tag in self.backends
                ), "Backend {} is not registered.".format(backend_tag)
        assert num_replicas >= 0, ("Number of replicas must be"
                                   " greater than or equal to 0.")

        current_num_replicas = len(self.backend_replicas[backend_tag])
        delta_num_replicas = num_replicas - current_num_replicas

        backend_info: BackendInfo = self.backends[backend_tag]
        if delta_num_replicas > 0:
            can_schedule = try_schedule_resources_on_nodes(requirements=[
                backend_info.replica_config.resource_dict
                for _ in range(delta_num_replicas)
            ])

            if _RESOURCE_CHECK_ENABLED and not all(can_schedule):
                num_possible = sum(can_schedule)
                raise RayServeException(
                    "Cannot scale backend {} to {} replicas. Ray Serve tried "
                    "to add {} replicas but the resources only allows {} "
                    "to be added. To fix this, consider scaling to replica to "
                    "{} or add more resources to the cluster. You can check "
                    "avaiable resources with ray.nodes().".format(
                        backend_tag, num_replicas, delta_num_replicas,
                        num_possible, current_num_replicas + num_possible))

            logger.debug("Adding {} replicas to backend {}".format(
                delta_num_replicas, backend_tag))
            for _ in range(delta_num_replicas):
                replica_tag = "{}#{}".format(backend_tag, get_random_letters())
                self.backend_replicas_to_start[backend_tag].append(replica_tag)

        elif delta_num_replicas < 0:
            logger.debug("Removing {} replicas from backend '{}'".format(
                -delta_num_replicas, backend_tag))
            assert len(
                self.backend_replicas[backend_tag]) >= delta_num_replicas
            replicas_copy = self.backend_replicas.copy()
            for _ in range(-delta_num_replicas):
                replica_tag, _ = replicas_copy[backend_tag].popitem()

                graceful_timeout_s = (backend_info.backend_config.
                                      experimental_graceful_shutdown_timeout_s)
                if force_kill:
                    graceful_timeout_s = 0
                self.backend_replicas_to_stop[backend_tag].append((
                    replica_tag,
                    graceful_timeout_s,
                ))