Example #1
0
    async def ensure_serializable_response(self, response: Any) -> Any:
        if isinstance(response, starlette.responses.StreamingResponse):

            async def mock_receive():
                # This is called in a tight loop in response() just to check
                # for an http disconnect.  So rather than return immediately
                # we should suspend execution to avoid wasting CPU cycles.
                never_set_event = asyncio.Event()
                await never_set_event.wait()

            sender = ASGIHTTPSender()
            await response(scope=None, receive=mock_receive, send=sender)
            return sender.build_starlette_response()
        return response
Example #2
0
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        arg = parse_request_item(request_item)

        start = time.time()
        method_to_call = None
        try:
            # TODO(simon): Split this section out when invoke_batch is removed.
            if self.config.internal_metadata.is_asgi_app:
                request: Request = arg
                scope = request.scope
                root_path = self.config.internal_metadata.path_prefix

                # The incoming scope["path"] contains prefixed path and it
                # won't be stripped by FastAPI.
                request.scope["path"] = scope["path"].replace(root_path, "", 1)
                # root_path is used such that the reverse look up and
                # redirection works.
                request.scope["root_path"] = root_path

                sender = ASGIHTTPSender()
                await self.callable._serve_asgi_app(
                    request.scope,
                    request._receive,
                    sender,
                )
                result = sender.build_starlette_response()
            else:
                method_to_call = sync_to_async(
                    self.get_runner_method(request_item))
                result = await method_to_call(arg)
            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            import os
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.observe(latency_ms,
                                                tags={"batch_size": "1"})

        return result
Example #3
0
    async def invoke_single(self, request_item: Query) -> Any:
        logger.debug("Replica {} started executing request {}".format(
            self.replica_tag, request_item.metadata.request_id))
        args, kwargs = parse_request_item(request_item)

        start = time.time()
        method_to_call = None
        try:
            # TODO(simon): Split this section out when invoke_batch is removed.
            if self.config.internal_metadata.is_asgi_app:
                request: Request = args[0]
                sender = ASGIHTTPSender()
                await self.callable._serve_asgi_app(
                    request.scope,
                    request._receive,
                    sender,
                )
                result = sender.build_starlette_response()
            else:
                method_to_call = sync_to_async(
                    self.get_runner_method(request_item))
                result = await method_to_call(*args, **kwargs)
            result = await self.ensure_serializable_response(result)
            self.request_counter.inc()
        except Exception as e:
            import os
            if "RAY_PDB" in os.environ:
                ray.util.pdb.post_mortem()
            function_name = "unknown"
            if method_to_call is not None:
                function_name = method_to_call.__name__
            result = wrap_to_ray_error(function_name, e)
            self.error_counter.inc()

        latency_ms = (time.time() - start) * 1000
        self.processing_latency_tracker.observe(latency_ms)

        return result