コード例 #1
0
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        args, kwargs = parse_request_item(request_item)

        start = time.time()
        method_to_call = None
        try:
            method_to_call = sync_to_async(
                self.get_runner_method(request_item))
            result = await method_to_call(*args, **kwargs)

            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            import os
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.observe(latency_ms)

        return result
コード例 #2
0
ファイル: replica.py プロジェクト: wuisawesome/ray
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        args, kwargs = parse_request_item(request_item)

        start = time.time()
        method_to_call = None
        try:
            runner_method = self.get_runner_method(request_item)
            method_to_call = sync_to_async(runner_method)
            result = None
            if len(inspect.signature(runner_method).parameters) > 0:
                result = await method_to_call(*args, **kwargs)
            else:
                # The method doesn't take in anything, including the request
                # information, so we pass nothing into it
                result = await method_to_call()

            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            import os

            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.observe(latency_ms)

        return result
コード例 #3
0
ファイル: backend_worker.py プロジェクト: shuminghu/ray
    async def invoke_single(self, request_item: Query) -> Any:
        args, kwargs, is_web_context = parse_request_item(request_item)
        serve_context.web = is_web_context

        method_to_call = self.get_runner_method(request_item)
        args = args if self.has_positional_args(method_to_call) else []
        method_to_call = ensure_async(method_to_call)

        start = time.time()
        try:
            result = await method_to_call(*args, **kwargs)
            self.request_counter.record(1, {"backend": self.backend_tag})
        except Exception as e:
            result = wrap_to_ray_error(e)
            self.error_counter.record(1, {"backend": self.backend_tag})
        finally:
            self._reset_context()
        self.processing_latency_tracker.record(
            (time.time() - start) * 1000, {
                "backend": self.backend_tag,
                "replica": self.replica_tag,
                "batch_size": "1"
            })

        return result
コード例 #4
0
ファイル: backend_worker.py プロジェクト: zhangbushi10/ray
    async def invoke_batch(self, request_item_list: List[Query]) -> List[Any]:
        args = []
        call_methods = set()
        batch_size = len(request_item_list)

        # Construct the batch of requests
        for item in request_item_list:
            logger.debug("Replica {} started executing request {}".format(
                self.replica_tag, item.metadata.request_id))
            args.append(parse_request_item(item))
            call_methods.add(self.get_runner_method(item))

        timing_start = time.time()
        try:
            if len(call_methods) != 1:
                raise RayServeException(
                    f"Queries contain mixed calling methods: {call_methods}. "
                    "Please only send the same type of requests in batching "
                    "mode.")

            self.request_counter.inc(batch_size)

            call_method = sync_to_async(call_methods.pop())
            result_list = await call_method(args)

            if not isinstance(result_list, Iterable) or isinstance(
                    result_list, (dict, set)):
                error_message = ("RayServe expects an ordered iterable object "
                                 "but the replica returned a {}".format(
                                     type(result_list)))
                raise RayServeException(error_message)

            # Normalize the result into a list type. This operation is fast
            # in Python because it doesn't copy anything.
            result_list = list(result_list)

            if (len(result_list) != batch_size):
                error_message = ("Worker doesn't preserve batch size. The "
                                 "input has length {} but the returned list "
                                 "has length {}. Please return a list of "
                                 "results with length equal to the batch size"
                                 ".".format(batch_size, len(result_list)))
                raise RayServeException(error_message)
            for i, result in enumerate(result_list):
                result_list[i] = (await
                                  self.ensure_serializable_response(result))
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(call_method.__name__, e)
            self.error_counter.inc()
            result_list = [wrapped_exception for _ in range(batch_size)]

        latency_ms = (time.time() - timing_start) * 1000
        self.processing_latency_tracker.observe(
            latency_ms, tags={"batch_size": str(batch_size)})

        return result_list
コード例 #5
0
    async def invoke_batch(self, request_item_list: List[Query]) -> List[Any]:
        args = []
        call_methods = set()
        batch_size = len(request_item_list)

        # Construct the batch of requests
        for item in request_item_list:
            args.append(parse_request_item(item))
            call_methods.add(self.get_runner_method(item))

        timing_start = time.time()
        try:
            if len(call_methods) != 1:
                raise RayServeException(
                    f"Queries contain mixed calling methods: {call_methods}. "
                    "Please only send the same type of requests in batching "
                    "mode.")

            self.request_counter.record(batch_size,
                                        {"backend": self.backend_tag})

            call_method = ensure_async(call_methods.pop())
            result_list = await call_method(args)

            if not isinstance(result_list, Iterable) or isinstance(
                    result_list, (dict, set)):
                error_message = ("RayServe expects an ordered iterable object "
                                 "but the worker returned a {}".format(
                                     type(result_list)))
                raise RayServeException(error_message)

            # Normalize the result into a list type. This operation is fast
            # in Python because it doesn't copy anything.
            result_list = list(result_list)

            if (len(result_list) != batch_size):
                error_message = ("Worker doesn't preserve batch size. The "
                                 "input has length {} but the returned list "
                                 "has length {}. Please return a list of "
                                 "results with length equal to the batch size"
                                 ".".format(batch_size, len(result_list)))
                raise RayServeException(error_message)
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self.error_counter.record(1, {"backend": self.backend_tag})
            result_list = [wrapped_exception for _ in range(batch_size)]

        self.processing_latency_tracker.record(
            (time.time() - timing_start) * 1000, {
                "backend": self.backend_tag,
                "replica_tag": self.replica_tag,
                "batch_size": str(batch_size)
            })

        return result_list
コード例 #6
0
    def invoke_single(self, request_item):
        args, kwargs, is_web_context = parse_request_item(request_item)
        serve_context.web = is_web_context
        start_timestamp = time.time()

        try:
            result = self.__call__(*args, **kwargs)
        except Exception as e:
            result = wrap_to_ray_error(e)
            self._serve_metric_error_counter += 1

        self._serve_metric_latency_list.append(time.time() - start_timestamp)
        return result
コード例 #7
0
ファイル: backend_worker.py プロジェクト: whitezou/ray
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        arg = parse_request_item(request_item)

        start = time.time()
        method_to_call = None
        try:
            # TODO(simon): Split this section out when invoke_batch is removed.
            if self.config.internal_metadata.is_asgi_app:
                request: Request = arg
                scope = request.scope
                root_path = self.config.internal_metadata.path_prefix

                # The incoming scope["path"] contains prefixed path and it
                # won't be stripped by FastAPI.
                request.scope["path"] = scope["path"].replace(root_path, "", 1)
                # root_path is used such that the reverse look up and
                # redirection works.
                request.scope["root_path"] = root_path

                sender = ASGIHTTPSender()
                await self.callable._serve_asgi_app(
                    request.scope,
                    request._receive,
                    sender,
                )
                result = sender.build_starlette_response()
            else:
                method_to_call = sync_to_async(
                    self.get_runner_method(request_item))
                result = await method_to_call(arg)
            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            import os
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.observe(latency_ms,
                                                tags={"batch_size": "1"})

        return result
コード例 #8
0
ファイル: replica.py プロジェクト: parasj/ray
    async def invoke_single(self, request_item: Query) -> Tuple[Any, bool]:
        """Executes the provided request on this replica.

        Returns the user-provided output and a boolean indicating if the
        request succeeded (user code didn't raise an exception).
        """
        logger.debug(
            "Replica {} started executing request {}".format(
                self.replica_tag, request_item.metadata.request_id
            )
        )
        args, kwargs = parse_request_item(request_item)

        method_to_call = None
        success = True
        try:
            runner_method = self.get_runner_method(request_item)
            method_to_call = sync_to_async(runner_method)
            result = None
            if len(inspect.signature(runner_method).parameters) > 0:
                result = await method_to_call(*args, **kwargs)
            else:
                # When access via http http_arg_is_pickled with no args:
                # args = (<starlette.requests.Request object at 0x7fe900694cc0>,)
                # When access via python with no args:
                # args = ()
                if len(args) == 1 and isinstance(args[0], starlette.requests.Request):
                    # The method doesn't take in anything, including the request
                    # information, so we pass nothing into it
                    result = await method_to_call()
                else:
                    # Will throw due to signature mismatch if user attempts to
                    # call with non-empty args
                    result = await method_to_call(*args, **kwargs)

            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            logger.exception(f"Request failed due to {type(e).__name__}:")
            success = False
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        return result, success
コード例 #9
0
ファイル: backend_worker.py プロジェクト: roalexan/ray
    async def invoke_single(self, request_item):
        args, kwargs, is_web_context = parse_request_item(request_item)
        serve_context.web = is_web_context

        method_to_call = self.get_runner_method(request_item)
        args = args if self.has_positional_args(method_to_call) else []
        method_to_call = ensure_async(method_to_call)
        try:
            result = await method_to_call(*args, **kwargs)
            self.request_counter.add()
        except Exception as e:
            result = wrap_to_ray_error(e)
            self.error_counter.add()

        return result
コード例 #10
0
    async def invoke_single(self, request_item: Query) -> Any:
        method_to_call = ensure_async(self.get_runner_method(request_item))
        arg = parse_request_item(request_item)

        start = time.time()
        try:
            result = await method_to_call(arg)
            self.request_counter.record(1)
        except Exception as e:
            result = wrap_to_ray_error(e)
            self.error_counter.record(1)

        self.processing_latency_tracker.record(
            (time.time() - start) * 1000, tags={"batch_size": "1"})

        return result
コード例 #11
0
    async def invoke_single(self, request_item):
        args, kwargs, is_web_context = parse_request_item(request_item)
        serve_context.web = is_web_context
        start_timestamp = time.time()

        method_to_call = self.get_runner_method(request_item)
        args = args if self.has_positional_args(method_to_call) else []
        method_to_call = ensure_async(method_to_call)
        try:
            result = await method_to_call(*args, **kwargs)
        except Exception as e:
            result = wrap_to_ray_error(e)
            self.error_counter += 1

        self.latency_list.append(time.time() - start_timestamp)
        return result
コード例 #12
0
ファイル: backend_worker.py プロジェクト: yonkshi/ray
    async def invoke_single(self, request_item: Query) -> Any:
        args, kwargs, is_web_context = parse_request_item(request_item)
        serve_context.web = is_web_context

        method_to_call = self.get_runner_method(request_item)
        args = args if self.has_positional_args(method_to_call) else []
        method_to_call = ensure_async(method_to_call)
        try:
            result = await method_to_call(*args, **kwargs)
            self.request_counter.record(1, {"backend": self.backend_tag})
        except Exception as e:
            result = wrap_to_ray_error(e)
            self.error_counter.record(1, {"backend": self.backend_tag})
        finally:
            self._reset_context()

        return result
コード例 #13
0
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        method_to_call = ensure_async(self.get_runner_method(request_item))
        arg = parse_request_item(request_item)

        start = time.time()
        try:
            result = await method_to_call(arg)
            self.request_counter.record(1)
        except Exception as e:
            import os
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            result = wrap_to_ray_error(e)
            self.error_counter.record(1)

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.record(
            latency_ms, tags={"batch_size": "1"})

        return result
コード例 #14
0
ファイル: backend_worker.py プロジェクト: yyz940922/ray
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        args, kwargs = parse_request_item(request_item)

        start = time.time()
        method_to_call = None
        try:
            # TODO(simon): Split this section out when invoke_batch is removed.
            if self.config.internal_metadata.is_asgi_app:
                request: Request = args[0]
                sender = ASGIHTTPSender()
                await self.callable._serve_asgi_app(
                    request.scope,
                    request._receive,
                    sender,
                )
                result = sender.build_starlette_response()
            else:
                method_to_call = sync_to_async(
                    self.get_runner_method(request_item))
                result = await method_to_call(*args, **kwargs)
            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            import os
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.observe(latency_ms)

        return result
コード例 #15
0
    def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument it turned into list.
                arg_list.append(FakeFlaskRequest())

        try:
            # check mixing of query context
            # unified context needed
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")

            serve_context.web = context_flags.pop()
            serve_context.batch_size = batch_size
            # Flask requests are passed to __call__ as a list
            arg_list = [arg_list]

            start_timestamp = time.time()
            result_list = self.__call__(*arg_list, **kwargs_list)

            self._serve_metric_latency_list.append(time.time() -
                                                   start_timestamp)
            if (not isinstance(result_list,
                               list)) or (len(result_list) != batch_size):
                raise RayServeException("__call__ function "
                                        "doesn't preserve batch-size. "
                                        "Please return a list of result "
                                        "with length equals to the batch "
                                        "size.")
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self._serve_metric_error_counter += batch_size
            return [wrapped_exception for _ in range(batch_size)]
コード例 #16
0
ファイル: backend_worker.py プロジェクト: mmilk1231/ray
    async def invoke_batch(self, request_item_list):
        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)
        call_methods = set()

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            call_method = self.get_runner_method(item)
            call_methods.add(call_method)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument is turned into list.
                if self.has_positional_args(call_method):
                    arg_list.append(FakeFlaskRequest())

        try:
            # Check mixing of query context (unified context needed).
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")
            serve_context.web = context_flags.pop()

            if len(call_methods) != 1:
                raise RayServeException(
                    "Queries contain mixed calling methods. Please only send "
                    "the same type of requests in batching mode.")
            call_method = ensure_async(call_methods.pop())

            serve_context.batch_size = batch_size
            # Flask requests are passed to __call__ as a list
            arg_list = [arg_list]

            self.request_counter.add(batch_size)
            result_list = await call_method(*arg_list, **kwargs_list)

            if not isinstance(result_list, Iterable) or isinstance(
                    result_list, (dict, set)):
                error_message = ("RayServe expects an ordered iterable object "
                                 "but the worker returned a {}".format(
                                     type(result_list)))
                raise RayServeException(error_message)

            # Normalize the result into a list type. This operation is fast
            # in Python because it doesn't copy anything.
            result_list = list(result_list)

            if (len(result_list) != batch_size):
                error_message = ("Worker doesn't preserve batch size. The "
                                 "input has length {} but the returned list "
                                 "has length {}. Please return a list of "
                                 "results with length equal to the batch size"
                                 ".".format(batch_size, len(result_list)))
                raise RayServeException(error_message)
            self._reset_context()
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self.error_counter.add()
            self._reset_context()
            return [wrapped_exception for _ in range(batch_size)]
コード例 #17
0
    async def invoke_batch(self, request_item_list):
        # TODO(alind) : create no-http services. The enqueues
        # from such services will always be TaskContext.Python.

        # Assumption : all the requests in a bacth
        # have same serve context.

        # For batching kwargs are modified as follows -
        # kwargs [Python Context] : key,val
        # kwargs_list             : key, [val1,val2, ... , valn]
        # or
        # args[Web Context]       : val
        # args_list               : [val1,val2, ...... , valn]
        # where n (current batch size) <= max_batch_size of a backend

        arg_list = []
        kwargs_list = defaultdict(list)
        context_flags = set()
        batch_size = len(request_item_list)
        call_methods = set()

        for item in request_item_list:
            args, kwargs, is_web_context = parse_request_item(item)
            context_flags.add(is_web_context)

            call_method = self.get_runner_method(item)
            call_methods.add(call_method)

            if is_web_context:
                # Python context only have kwargs
                flask_request = args[0]
                arg_list.append(flask_request)
            else:
                # Web context only have one positional argument
                for k, v in kwargs.items():
                    kwargs_list[k].append(v)

                # Set the flask request as a list to conform
                # with batching semantics: when in batching
                # mode, each argument is turned into list.
                if self.has_positional_args(call_method):
                    arg_list.append(FakeFlaskRequest())

        try:
            # Check mixing of query context (unified context needed).
            if len(context_flags) != 1:
                raise RayServeException(
                    "Batched queries contain mixed context. Please only send "
                    "the same type of requests in batching mode.")
            serve_context.web = context_flags.pop()

            if len(call_methods) != 1:
                raise RayServeException(
                    "Queries contain mixed calling methods. Please only send "
                    "the same type of requests in batching mode.")
            call_method = ensure_async(call_methods.pop())

            serve_context.batch_size = batch_size
            # Flask requests are passed to __call__ as a list
            arg_list = [arg_list]

            start_timestamp = time.time()
            result_list = await call_method(*arg_list, **kwargs_list)

            self.latency_list.append(time.time() - start_timestamp)
            if (not isinstance(result_list,
                               list)) or (len(result_list) != batch_size):
                error_message = ("Worker doesn't preserve batch size. The "
                                 "input has length {} but the returned list "
                                 "has length {}. Please return a list of "
                                 "results with length equal to the batch size"
                                 ".".format(batch_size, len(result_list)))
                raise RayServeException(error_message)
            return result_list
        except Exception as e:
            wrapped_exception = wrap_to_ray_error(e)
            self.error_counter += batch_size
            return [wrapped_exception for _ in range(batch_size)]