예제 #1
0
    def remote(self, *args, **kwargs):
        if len(args) != 0:
            raise RayServeException(
                "handle.remote must be invoked with keyword arguments.")

        # get slo_ms before enqueuing the query
        request_slo_ms = kwargs.pop("slo_ms", None)
        if request_slo_ms is not None:
            try:
                request_slo_ms = float(request_slo_ms)
                if request_slo_ms < 0:
                    raise ValueError(
                        "Request SLO must be positive, it is {}".format(
                            request_slo_ms))
            except ValueError as e:
                raise RayServeException(str(e))

        result_object_id_bytes = ray.get(
            self.router_handle.enqueue_request.remote(
                service=self.endpoint_name,
                request_args=(),
                request_kwargs=kwargs,
                request_context=TaskContext.Python,
                request_slo_ms=request_slo_ms))
        return ray.ObjectID(result_object_id_bytes)
예제 #2
0
    def remote(self, *args, **kwargs):
        if len(args) != 0:
            raise RayServeException(
                "handle.remote must be invoked with keyword arguments.")

        result_object_id_bytes = ray.get(
            self.router_handle.enqueue_request.remote(
                service=self.endpoint_name,
                request_args=(),
                request_kwargs=kwargs,
                request_context=TaskContext.Python))
        return ray.ObjectID(result_object_id_bytes)
예제 #3
0
파일: handle.py 프로젝트: skyofwinter/ray
    def remote(self, *args, **kwargs):
        if len(args) != 0:
            raise RayServeException(
                "handle.remote must be invoked with keyword arguments.")

        # create RequestMetadata instance
        request_in_object = RequestMetadata(self.endpoint_name,
                                            TaskContext.Python,
                                            self.relative_slo_ms,
                                            self.absolute_slo_ms)
        return self.router_handle.enqueue_request.remote(
            request_in_object, **kwargs)
예제 #4
0
파일: handle.py 프로젝트: skyofwinter/ray
 def _check_slo_ms(self, slo_value):
     if slo_value is not None:
         try:
             slo_value = float(slo_value)
             if slo_value < 0:
                 raise ValueError(
                     "Request SLO must be positive, it is {}".format(
                         slo_value))
             return slo_value
         except ValueError as e:
             raise RayServeException(str(e))
     return None
예제 #5
0
 def __setattr__(self, name, value):
     raise RayServeException(_not_in_web_context_error)
예제 #6
0
 def __getattribute__(self, name):
     raise RayServeException(_not_in_web_context_error)
예제 #7
0
파일: api.py 프로젝트: pangfd/ray-1
 def check(*args, **kwargs):
     if _get_global_state() is None:
         raise RayServeException("Please run serve.init to initialize or "
                                 "connect to existing ray serve cluster.")
     return f(*args, **kwargs)
예제 #8
0
파일: api.py 프로젝트: skyofwinter/ray
def create_backend(func_or_class,
                   backend_tag,
                   *actor_init_args,
                   backend_config=BackendConfig()):
    """Create a backend using func_or_class and assign backend_tag.

    Args:
        func_or_class (callable, class): a function or a class implements
            __call__ protocol.
        backend_tag (str): a unique tag assign to this backend. It will be used
            to associate services in traffic policy.
        backend_config (BackendConfig): An object defining backend properties
        for starting a backend.
        *actor_init_args (optional): the argument to pass to the class
            initialization method.
    """
    assert isinstance(backend_config,
                      BackendConfig), ("backend_config must be"
                                       " of instance BackendConfig")
    backend_config_dict = dict(backend_config)

    should_accept_batch = (True if backend_config.max_batch_size is not None
                           else False)
    batch_annotation_not_found = RayServeException(
        "max_batch_size is set in config but the function or method does not "
        "accept batching. Please use @serve.accept_batch to explicitly mark "
        "the function or method as batchable and takes in list as arguments.")

    arg_list = []
    if inspect.isfunction(func_or_class):
        if should_accept_batch and not hasattr(func_or_class,
                                               "serve_accept_batch"):
            raise batch_annotation_not_found

        # arg list for a fn is function itself
        arg_list = [func_or_class]
        # ignore lint on lambda expression
        creator = lambda kwrgs: TaskRunnerActor._remote(**kwrgs)  # noqa: E731
    elif inspect.isclass(func_or_class):
        if should_accept_batch and not hasattr(func_or_class.__call__,
                                               "serve_accept_batch"):
            raise batch_annotation_not_found

        # Python inheritance order is right-to-left. We put RayServeMixin
        # on the left to make sure its methods are not overriden.
        @ray.remote
        class CustomActor(RayServeMixin, func_or_class):
            pass

        arg_list = actor_init_args
        # ignore lint on lambda expression
        creator = lambda kwargs: CustomActor._remote(**kwargs)  # noqa: E731
    else:
        raise TypeError(
            "Backend must be a function or class, it is {}.".format(
                type(func_or_class)))

    # save creator which starts replicas
    global_state.backend_table.register_backend(backend_tag, creator)

    # save information about configurations needed to start the replicas
    global_state.backend_table.register_info(backend_tag, backend_config_dict)

    # save the initial arguments needed by replicas
    global_state.backend_table.save_init_args(backend_tag, arg_list)

    # set the backend config inside the router
    # particularly for max-batch-size
    ray.get(global_state.init_or_get_router().set_backend_config.remote(
        backend_tag, backend_config_dict))
    scale(backend_tag, backend_config_dict["num_replicas"])
예제 #9
0
    def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument it turned into list.
                arg_list.append(FakeFlaskRequest())

        try:
            # check mixing of query context
            # unified context needed
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")

            serve_context.web = context_flags.pop()
            serve_context.batch_size = batch_size
            start_timestamp = time.time()

            result_list = self.__call__(*args, **kwargs_list)

            self._serve_metric_latency_list.append(time.time() -
                                                   start_timestamp)
            if (not isinstance(result_list,
                               list)) or (len(result_list) != batch_size):
                raise RayServeException("__call__ function "
                                        "doesn't preserve batch-size. "
                                        "Please return a list of result "
                                        "with length equals to the batch "
                                        "size.")
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self._serve_metric_error_counter += batch_size
            return [wrapped_exception for _ in range(batch_size)]
예제 #10
0
    def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        kwargs_list = defaultdict(list)
        result_object_ids, context_flag_list, arg_list = [], [], []
        curr_batch_size = len(request_item_list)

        for item in request_item_list:
            args, kwargs, is_web_context, result_object_id = (
                parse_request_item(item))
            context_flag_list.append(is_web_context)

            # Python context only have kwargs
            # Web context only have one positional argument
            if is_web_context:
                arg_list.append(args[0])
            else:
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)
            result_object_ids.append(result_object_id)

        try:
            # check mixing of query context
            # unified context needed
            if len(set(context_flag_list)) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context.")
            serve_context.web = all(context_flag_list)
            if serve_context.web:
                args = (arg_list, )
            else:
                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument it turned into list.
                fake_flask_request_lst = [
                    FakeFlaskRequest() for _ in range(curr_batch_size)
                ]
                args = (fake_flask_request_lst, )
            # set the current batch size (n) for serve_context
            serve_context.batch_size = len(result_object_ids)
            start_timestamp = time.time()
            result_list = self.__call__(*args, **kwargs_list)
            if (not isinstance(result_list, list)) or (len(result_list) !=
                                                       len(result_object_ids)):
                raise RayServeException("__call__ function "
                                        "doesn't preserve batch-size. "
                                        "Please return a list of result "
                                        "with length equals to the batch "
                                        "size.")
            for result, result_object_id in zip(result_list,
                                                result_object_ids):
                ray.worker.global_worker.put_object(result, result_object_id)
            self._serve_metric_latency_list.append(time.time() -
                                                   start_timestamp)
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self._serve_metric_error_counter += len(result_object_ids)
            for result_object_id in result_object_ids:
                ray.worker.global_worker.put_object(wrapped_exception,
                                                    result_object_id)