def __init__(self, name, replica_tag, _callable, is_function): self.name = name self.replica_tag = replica_tag self.callable = _callable self.is_function = is_function self.metric_client = MetricClient.connect_from_serve( default_labels={"backend": self.name}) self.request_counter = self.metric_client.new_counter( "backend_request_counter", description=("Number of queries that have been " "processed in this replica"), ) self.error_counter = self.metric_client.new_counter( "backend_error_counter", description=("Number of exceptions that have " "occurred in the backend"), ) self.restart_counter = self.metric_client.new_counter( "backend_worker_starts", description=("The number of time this replica workers " "has been restarted due to failure."), label_names=("replica_tag", )) self.restart_counter.labels(replica_tag=self.replica_tag).add()
async def fetch_config_from_master(self): assert ray.is_initialized() master = ray.util.get_actor(SERVE_MASTER_NAME) self.route_table, [ self.router_handle ] = await retry_actor_failures_async(master.get_http_proxy_config) # The exporter is required to return results for /-/metrics endpoint. [self.metric_exporter ] = await retry_actor_failures_async(master.get_metric_exporter) self.metric_client = MetricClient.connect_from_serve() self.request_counter = self.metric_client.new_counter( "num_http_requests", description="The number of requests processed", label_names=("route", ))
async def __init__(self): # Note: Several queues are used in the router # - When a request come in, it's placed inside its corresponding # endpoint_queue. # - The endpoint_queue is dequeued during flush operation, which moves # the queries to backend buffer_queue. Here we match a request # for an endpoint to a backend given some policy. # - The worker_queue is used to collect idle actor handle. These # handles are dequed during the second stage of flush operation, # which assign queries in buffer_queue to actor handle. # -- Queues -- # # endpoint_name -> request queue self.endpoint_queues: DefaultDict[asyncio.Queue[Query]] = defaultdict( asyncio.Queue) # backend_name -> worker request queue self.worker_queues: DefaultDict[asyncio.Queue[ ray.actor.ActorHandle]] = defaultdict(asyncio.Queue) # backend_name -> worker payload queue self.backend_queues = defaultdict(blist.sortedlist) # -- Metadata -- # # endpoint_name -> traffic_policy self.traffic = dict() # backend_name -> backend_config self.backend_info = dict() # replica tag -> worker_handle self.replicas = dict() # -- Synchronization -- # # This lock guarantee that only one flush operation can happen at a # time. Without the lock, multiple flush operation can pop from the # same buffer_queue and worker_queue and create deadlock. For example, # an operation holding the only query and the other flush operation # holding the only idle replica. Additionally, allowing only one flush # operation at a time simplifies design overhead for custom queuing and # batching polcies. self.flush_lock = asyncio.Lock() # Fetch the worker handles, traffic policies, and backend configs from # the master actor. We use a "pull-based" approach instead of pushing # them from the master so that the router can transparently recover # from failure. ray.serve.init() master_actor = ray.serve.api._get_master_actor() traffic_policies = retry_actor_failures( master_actor.get_traffic_policies) for endpoint, traffic_policy in traffic_policies.items(): await self.set_traffic(endpoint, traffic_policy) backend_dict = retry_actor_failures( master_actor.get_all_worker_handles) for backend_tag, replica_dict in backend_dict.items(): for replica_tag, worker in replica_dict.items(): await self.add_new_worker(backend_tag, replica_tag, worker) backend_configs = retry_actor_failures( master_actor.get_backend_configs) for backend, backend_config in backend_configs.items(): await self.set_backend_config(backend, backend_config) self.metric_client = MetricClient.connect_from_serve() self.num_router_requests = self.metric_client.new_counter( "num_router_requests", description="Number of requests processed by the router.", label_names=("endpoint", )) self.num_error_endpoint_request = self.metric_client.new_counter( "num_error_endpoint_requests", description=("Number of requests errored when getting result " "for endpoint."), label_names=("endpoint", )) self.num_error_backend_request = self.metric_client.new_counter( "num_error_backend_requests", description=("Number of requests errored when getting result " "from backend."), label_names=("backend", ))