Example #1
0
    def __init__(self, backend_tag: str, replica_tag: str, _callable: Callable,
                 backend_config: BackendConfig, is_function: bool) -> None:
        self.backend_tag = backend_tag
        self.replica_tag = replica_tag
        self.callable = _callable
        self.is_function = is_function

        self.config = backend_config
        self.batch_queue = BatchQueue(self.config.max_batch_size or 1,
                                      self.config.batch_wait_timeout)

        self.num_ongoing_requests = 0

        self.request_counter = metrics.Count(
            "backend_request_counter", ("Number of queries that have been "
                                        "processed in this replica"),
            "requests", ["backend"])
        self.error_counter = metrics.Count("backend_error_counter",
                                           ("Number of exceptions that have "
                                            "occurred in the backend"),
                                           "errors", ["backend"])
        self.restart_counter = metrics.Count(
            "backend_worker_starts",
            ("The number of time this replica workers "
             "has been restarted due to failure."), "restarts",
            ["backend", "replica_tag"])

        self.queuing_latency_tracker = metrics.Histogram(
            "backend_queuing_latency_ms",
            ("The latency for queries waiting in the replica's queue "
             "waiting to be processed or batched."), "ms",
            DEFAULT_LATENCY_BUCKET_MS, ["backend", "replica_tag"])
        self.processing_latency_tracker = metrics.Histogram(
            "backend_processing_latency_ms",
            "The latency for queries to be processed", "ms",
            DEFAULT_LATENCY_BUCKET_MS,
            ["backend", "replica_tag", "batch_size"])
        self.num_queued_items = metrics.Gauge(
            "replica_queued_queries",
            "Current number of queries queued in the the backend replicas",
            "requests", ["backend", "replica_tag"])
        self.num_processing_items = metrics.Gauge(
            "replica_processing_queries",
            "Current number of queries being processed", "requests",
            ["backend", "replica_tag"])

        self.restart_counter.record(1, {
            "backend": self.backend_tag,
            "replica_tag": self.replica_tag
        })

        asyncio.get_event_loop().create_task(self.main_loop())
Example #2
0
    async def setup(self, name, controller_name):
        # Note: Several queues are used in the router
        # - When a request come in, it's placed inside its corresponding
        #   endpoint_queue.
        # - The endpoint_queue is dequeued during flush operation, which moves
        #   the queries to backend buffer_queue. Here we match a request
        #   for an endpoint to a backend given some policy.
        # - The worker_queue is used to collect idle actor handle. These
        #   handles are dequed during the second stage of flush operation,
        #   which assign queries in buffer_queue to actor handle.

        self.name = name

        # -- Queues -- #

        # endpoint_name -> request queue
        # We use FIFO (left to right) ordering. The new items should be added
        # using appendleft. Old items should be removed via pop().
        self.endpoint_queues: DefaultDict[deque[Query]] = defaultdict(deque)
        # backend_name -> worker replica tag queue
        self.worker_queues: DefaultDict[deque[str]] = defaultdict(deque)
        # backend_name -> worker payload queue
        self.backend_queues = defaultdict(deque)

        # -- Metadata -- #

        # endpoint_name -> traffic_policy
        self.traffic = dict()
        # backend_name -> backend_config
        self.backend_info = dict()
        # replica tag -> worker_handle
        self.replicas = dict()
        # backend_name -> replica_tag -> concurrent queries counter
        self.queries_counter = defaultdict(lambda: defaultdict(int))

        # -- Synchronization -- #

        # This lock guarantee that only one flush operation can happen at a
        # time. Without the lock, multiple flush operation can pop from the
        # same buffer_queue and worker_queue and create deadlock. For example,
        # an operation holding the only query and the other flush operation
        # holding the only idle replica. Additionally, allowing only one flush
        # operation at a time simplifies design overhead for custom queuing and
        # batching policies.
        self.flush_lock = asyncio.Lock()

        # -- State Restoration -- #
        # Fetch the worker handles, traffic policies, and backend configs from
        # the controller. We use a "pull-based" approach instead of pushing
        # them from the controller so that the router can transparently recover
        # from failure.
        self.controller = ray.get_actor(controller_name)

        traffic_policies = ray.get(
            self.controller.get_traffic_policies.remote())
        for endpoint, traffic_policy in traffic_policies.items():
            await self.set_traffic(endpoint, traffic_policy)

        backend_dict = ray.get(self.controller.get_all_worker_handles.remote())
        for backend_tag, replica_dict in backend_dict.items():
            for replica_tag, worker in replica_dict.items():
                await self.add_new_worker(backend_tag, replica_tag, worker)

        backend_configs = ray.get(self.controller.get_backend_configs.remote())
        for backend, backend_config in backend_configs.items():
            await self.set_backend_config(backend, backend_config)

        # -- Metrics Registration -- #
        self.num_router_requests = metrics.Count(
            "num_router_requests",
            "Number of requests processed by the router.", "requests",
            ["endpoint"])
        self.num_error_endpoint_requests = metrics.Count(
            "num_error_endpoint_requests",
            ("Number of requests that errored when getting results "
             "for the endpoint."), "requests", ["endpoint"])
        self.num_error_backend_requests = metrics.Count(
            "num_error_backend_requests",
            ("Number of requests that errored when getting result "
             "from the backend."), "requests", ["backend"])

        self.backend_queue_size = metrics.Gauge(
            "backend_queued_queries",
            "Current number of queries queued in the router for a backend",
            "requests", ["backend"])

        asyncio.get_event_loop().create_task(self.report_queue_lengths())