async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) args, kwargs = parse_request_item(request_item) start = time.time() method_to_call = None try: runner_method = self.get_runner_method(request_item) method_to_call = sync_to_async(runner_method) result = None if len(inspect.signature(runner_method).parameters) > 0: result = await method_to_call(*args, **kwargs) else: # The method doesn't take in anything, including the request # information, so we pass nothing into it result = await method_to_call() result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe(latency_ms) return result
async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) args, kwargs = parse_request_item(request_item) start = time.time() method_to_call = None try: method_to_call = sync_to_async( self.get_runner_method(request_item)) result = await method_to_call(*args, **kwargs) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe(latency_ms) return result
async def invoke_batch(self, request_item_list: List[Query]) -> List[Any]: args = [] call_methods = set() batch_size = len(request_item_list) # Construct the batch of requests for item in request_item_list: logger.debug("Replica {} started executing request {}".format( self.replica_tag, item.metadata.request_id)) args.append(parse_request_item(item)) call_methods.add(self.get_runner_method(item)) timing_start = time.time() try: if len(call_methods) != 1: raise RayServeException( f"Queries contain mixed calling methods: {call_methods}. " "Please only send the same type of requests in batching " "mode.") self.request_counter.inc(batch_size) call_method = sync_to_async(call_methods.pop()) result_list = await call_method(args) if not isinstance(result_list, Iterable) or isinstance( result_list, (dict, set)): error_message = ("RayServe expects an ordered iterable object " "but the replica returned a {}".format( type(result_list))) raise RayServeException(error_message) # Normalize the result into a list type. This operation is fast # in Python because it doesn't copy anything. result_list = list(result_list) if (len(result_list) != batch_size): error_message = ("Worker doesn't preserve batch size. The " "input has length {} but the returned list " "has length {}. Please return a list of " "results with length equal to the batch size" ".".format(batch_size, len(result_list))) raise RayServeException(error_message) for i, result in enumerate(result_list): result_list[i] = (await self.ensure_serializable_response(result)) except Exception as e: wrapped_exception = wrap_to_ray_error(call_method.__name__, e) self.error_counter.inc() result_list = [wrapped_exception for _ in range(batch_size)] latency_ms = (time.time() - timing_start) * 1000 self.processing_latency_tracker.observe( latency_ms, tags={"batch_size": str(batch_size)}) return result_list
async def reconfigure(self, user_config) -> None: if user_config: if self.is_function: raise ValueError( "backend_def must be a class to use user_config") elif not hasattr(self.callable, BACKEND_RECONFIGURE_METHOD): raise RayServeException("user_config specified but backend " + self.backend_tag + " missing " + BACKEND_RECONFIGURE_METHOD + " method") reconfigure_method = sync_to_async( getattr(self.callable, BACKEND_RECONFIGURE_METHOD)) await reconfigure_method(user_config)
async def reconfigure(self, user_config: Any): self.user_config = user_config self.version = DeploymentVersion(self.version.code_version, user_config=user_config) if self.is_function: raise ValueError( "deployment_def must be a class to use user_config") elif not hasattr(self.callable, RECONFIGURE_METHOD): raise RayServeException("user_config specified but deployment " + self.deployment_name + " missing " + RECONFIGURE_METHOD + " method") reconfigure_method = sync_to_async( getattr(self.callable, RECONFIGURE_METHOD)) await reconfigure_method(user_config)
async def invoke_single(self, request_item: Query) -> Tuple[Any, bool]: """Executes the provided request on this replica. Returns the user-provided output and a boolean indicating if the request succeeded (user code didn't raise an exception). """ logger.debug( "Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id ) ) args, kwargs = parse_request_item(request_item) method_to_call = None success = True try: runner_method = self.get_runner_method(request_item) method_to_call = sync_to_async(runner_method) result = None if len(inspect.signature(runner_method).parameters) > 0: result = await method_to_call(*args, **kwargs) else: # When access via http http_arg_is_pickled with no args: # args = (<starlette.requests.Request object at 0x7fe900694cc0>,) # When access via python with no args: # args = () if len(args) == 1 and isinstance(args[0], starlette.requests.Request): # The method doesn't take in anything, including the request # information, so we pass nothing into it result = await method_to_call() else: # Will throw due to signature mismatch if user attempts to # call with non-empty args result = await method_to_call(*args, **kwargs) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: logger.exception(f"Request failed due to {type(e).__name__}:") success = False if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() return result, success
async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) arg = parse_request_item(request_item) start = time.time() method_to_call = None try: # TODO(simon): Split this section out when invoke_batch is removed. if self.config.internal_metadata.is_asgi_app: request: Request = arg scope = request.scope root_path = self.config.internal_metadata.path_prefix # The incoming scope["path"] contains prefixed path and it # won't be stripped by FastAPI. request.scope["path"] = scope["path"].replace(root_path, "", 1) # root_path is used such that the reverse look up and # redirection works. request.scope["root_path"] = root_path sender = ASGIHTTPSender() await self.callable._serve_asgi_app( request.scope, request._receive, sender, ) result = sender.build_starlette_response() else: method_to_call = sync_to_async( self.get_runner_method(request_item)) result = await method_to_call(arg) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe(latency_ms, tags={"batch_size": "1"}) return result
async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) args, kwargs = parse_request_item(request_item) start = time.time() method_to_call = None try: # TODO(simon): Split this section out when invoke_batch is removed. if self.config.internal_metadata.is_asgi_app: request: Request = args[0] sender = ASGIHTTPSender() await self.callable._serve_asgi_app( request.scope, request._receive, sender, ) result = sender.build_starlette_response() else: method_to_call = sync_to_async( self.get_runner_method(request_item)) result = await method_to_call(*args, **kwargs) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() function_name = "unknown" if method_to_call is not None: function_name = method_to_call.__name__ result = wrap_to_ray_error(function_name, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe(latency_ms) return result
async def invoke_single(self, request_item: Query) -> Any: logger.debug("Replica {} started executing request {}".format( self.replica_tag, request_item.metadata.request_id)) method_to_call = sync_to_async(self.get_runner_method(request_item)) arg = parse_request_item(request_item) start = time.time() try: result = await method_to_call(arg) result = await self.ensure_serializable_response(result) self.request_counter.inc() except Exception as e: import os if "RAY_PDB" in os.environ: ray.util.pdb.post_mortem() result = wrap_to_ray_error(method_to_call.__name__, e) self.error_counter.inc() latency_ms = (time.time() - start) * 1000 self.processing_latency_tracker.observe( latency_ms, tags={"batch_size": "1"}) return result
def __init__( self, _callable: Callable, deployment_name: str, replica_tag: ReplicaTag, deployment_config: DeploymentConfig, user_config: Any, version: DeploymentVersion, is_function: bool, controller_handle: ActorHandle, ) -> None: self.deployment_config = deployment_config self.deployment_name = deployment_name self.replica_tag = replica_tag self.callable = _callable self.is_function = is_function self.user_config = user_config self.version = version self.rwlock = aiorwlock.RWLock() user_health_check = getattr(_callable, HEALTH_CHECK_METHOD, None) if not callable(user_health_check): def user_health_check(): pass self.user_health_check = sync_to_async(user_health_check) self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_deployment_request_counter", description= ("The number of queries that have been processed in this replica." ), tag_keys=("deployment", "replica"), ) self.request_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.error_counter = metrics.Counter( "serve_deployment_error_counter", description=( "The number of exceptions that have occurred in this replica." ), tag_keys=("deployment", "replica"), ) self.error_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter = metrics.Counter( "serve_deployment_replica_starts", description= ("The number of times this replica has been restarted due to failure." ), tag_keys=("deployment", "replica"), ) self.restart_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_deployment_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("deployment", "replica"), ) self.processing_latency_tracker.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("deployment", "replica"), ) self.num_processing_items.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter.inc() self._shutdown_wait_loop_s = deployment_config.graceful_shutdown_wait_loop_s if deployment_config.autoscaling_config: process_remote_func = controller_handle.record_autoscaling_metrics.remote config = deployment_config.autoscaling_config start_metrics_pusher( interval_s=config.metrics_interval_s, collection_callback=self._collect_autoscaling_metrics, metrics_process_func=process_remote_func, ) # NOTE(edoakes): we used to recommend that users use the "ray" logger # and tagged the logs with metadata as below. We now recommend using # the "ray.serve" 'component logger' (as of Ray 1.13). This is left to # maintain backwards compatibility with users who were using the # existing logger. We can consider removing it in Ray 2.0. ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve deployment={self.deployment_name} " f"replica={self.replica_tag}"))
def __init__( self, _callable: Callable, deployment_name: str, replica_tag: ReplicaTag, deployment_config: DeploymentConfig, user_config: Any, version: DeploymentVersion, is_function: bool, controller_handle: ActorHandle, ) -> None: self.deployment_config = deployment_config self.deployment_name = deployment_name self.replica_tag = replica_tag self.callable = _callable self.is_function = is_function self.user_config = user_config self.version = version self.rwlock = aiorwlock.RWLock() user_health_check = getattr(_callable, HEALTH_CHECK_METHOD, None) if not callable(user_health_check): def user_health_check(): pass self.user_health_check = sync_to_async(user_health_check) self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_deployment_request_counter", description=("The number of queries that have been " "processed in this replica."), tag_keys=("deployment", "replica"), ) self.request_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.error_counter = metrics.Counter( "serve_deployment_error_counter", description=("The number of exceptions that have " "occurred in this replica."), tag_keys=("deployment", "replica"), ) self.error_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter = metrics.Counter( "serve_deployment_replica_starts", description=("The number of times this replica " "has been restarted due to failure."), tag_keys=("deployment", "replica"), ) self.restart_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_deployment_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("deployment", "replica"), ) self.processing_latency_tracker.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("deployment", "replica"), ) self.num_processing_items.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter.inc() self._shutdown_wait_loop_s = deployment_config.graceful_shutdown_wait_loop_s if deployment_config.autoscaling_config: config = deployment_config.autoscaling_config start_metrics_pusher( interval_s=config.metrics_interval_s, collection_callback=self._collect_autoscaling_metrics, controller_handle=controller_handle, ) ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve deployment={self.deployment_name} " f"replica={self.replica_tag}"))