async def ensure_serializable_response(self, response: Any) -> Any: if isinstance(response, starlette.responses.StreamingResponse): async def mock_receive(): # This is called in a tight loop in response() just to check # for an http disconnect. So rather than return immediately # we should suspend execution to avoid wasting CPU cycles. never_set_event = asyncio.Event() await never_set_event.wait() sender = ASGIHTTPSender() await response(scope=None, receive=mock_receive, send=sender) return sender.build_starlette_response() return response
async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) arg = parse_request_item(request_item) start = time.time() method_to_call = None try: # TODO(simon): Split this section out when invoke_batch is removed. if self.config.internal_metadata.is_asgi_app: request: Request = arg scope = request.scope root_path = self.config.internal_metadata.path_prefix # The incoming scope["path"] contains prefixed path and it # won't be stripped by FastAPI. request.scope["path"] = scope["path"].replace(root_path, "", 1) # root_path is used such that the reverse look up and # redirection works. request.scope["root_path"] = root_path sender = ASGIHTTPSender() await self.callable._serve_asgi_app( request.scope, request._receive, sender, ) result = sender.build_starlette_response() else: method_to_call = sync_to_async( self.get_runner_method(request_item)) result = await method_to_call(arg) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe(latency_ms, tags={"batch_size": "1"}) return result
async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) args, kwargs = parse_request_item(request_item) start = time.time() method_to_call = None try: # TODO(simon): Split this section out when invoke_batch is removed. if self.config.internal_metadata.is_asgi_app: request: Request = args[0] sender = ASGIHTTPSender() await self.callable._serve_asgi_app( request.scope, request._receive, sender, ) result = sender.build_starlette_response() else: method_to_call = sync_to_async( self.get_runner_method(request_item)) result = await method_to_call(*args, **kwargs) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe(latency_ms) return result