Esempio n. 1
0
    async def fetch_config_from_master(self):
        assert ray.is_initialized()
        master = serve.api._get_master_actor()

        self.route_table, [self.router_handle
                           ] = await master.get_http_proxy_config.remote()

        # The exporter is required to return results for /-/metrics endpoint.
        [self.metric_exporter] = await master.get_metric_exporter.remote()

        self.metric_client = MetricClient(self.metric_exporter)
        self.request_counter = self.metric_client.new_counter(
            "num_http_requests",
            description="The number of requests processed",
            label_names=("route", ))
Esempio n. 2
0
    async def fetch_config_from_controller(self, instance_name=None):
        assert ray.is_initialized()
        controller = serve.api._get_controller()

        self.route_table = await controller.get_router_config.remote()

        # The exporter is required to return results for /-/metrics endpoint.
        [self.metric_exporter] = await controller.get_metric_exporter.remote()

        self.metric_client = MetricClient(self.metric_exporter)
        self.request_counter = self.metric_client.new_counter(
            "num_http_requests",
            description="The number of requests processed",
            label_names=("route", ))

        self.router = Router()
        await self.router.setup(instance_name)
Esempio n. 3
0
        def __init__(self,
                     backend_tag,
                     replica_tag,
                     init_args,
                     instance_name=None):
            serve.init(name=instance_name)
            if is_function:
                _callable = func_or_class
            else:
                _callable = func_or_class(*init_args)

            master = serve.api._get_master_actor()
            [metric_exporter
             ] = retry_actor_failures(master.get_metric_exporter)
            metric_client = MetricClient(
                metric_exporter, default_labels={"backend": backend_tag})
            self.backend = RayServeWorker(backend_tag, replica_tag, _callable,
                                          is_function, metric_client)
Esempio n. 4
0
        def __init__(self,
                     backend_tag,
                     replica_tag,
                     init_args,
                     backend_config: BackendConfig,
                     instance_name=None):
            serve.init(name=instance_name)

            if is_function:
                _callable = func_or_class
            else:
                _callable = func_or_class(*init_args)

            controller = serve.api._get_controller()
            [metric_exporter
             ] = ray.get(controller.get_metric_exporter.remote())
            metric_client = MetricClient(
                metric_exporter, default_labels={"backend": backend_tag})
            self.backend = RayServeWorker(backend_tag, replica_tag, _callable,
                                          backend_config, is_function,
                                          metric_client)
Esempio n. 5
0
    async def setup(self, instance_name=None):
        # Note: Several queues are used in the router
        # - When a request come in, it's placed inside its corresponding
        #   endpoint_queue.
        # - The endpoint_queue is dequeued during flush operation, which moves
        #   the queries to backend buffer_queue. Here we match a request
        #   for an endpoint to a backend given some policy.
        # - The worker_queue is used to collect idle actor handle. These
        #   handles are dequed during the second stage of flush operation,
        #   which assign queries in buffer_queue to actor handle.

        # -- Queues -- #

        # endpoint_name -> request queue
        # We use FIFO (left to right) ordering. The new items should be added
        # using appendleft. Old items should be removed via pop().
        self.endpoint_queues: DefaultDict[deque[Query]] = defaultdict(deque)
        # backend_name -> worker replica tag queue
        self.worker_queues: DefaultDict[deque[str]] = defaultdict(deque)
        # backend_name -> worker payload queue
        self.backend_queues = defaultdict(blist.sortedlist)

        # -- Metadata -- #

        # endpoint_name -> traffic_policy
        self.traffic = dict()
        # backend_name -> backend_config
        self.backend_info = dict()
        # replica tag -> worker_handle
        self.replicas = dict()
        # replica_tag -> concurrent queries counter
        self.queries_counter = defaultdict(lambda: 0)

        # -- Synchronization -- #

        # This lock guarantee that only one flush operation can happen at a
        # time. Without the lock, multiple flush operation can pop from the
        # same buffer_queue and worker_queue and create deadlock. For example,
        # an operation holding the only query and the other flush operation
        # holding the only idle replica. Additionally, allowing only one flush
        # operation at a time simplifies design overhead for custom queuing and
        # batching policies.
        self.flush_lock = asyncio.Lock()

        # -- State Restoration -- #
        # Fetch the worker handles, traffic policies, and backend configs from
        # the controller. We use a "pull-based" approach instead of pushing
        # them from the controller so that the router can transparently recover
        # from failure.
        serve.init(name=instance_name)
        controller = serve.api._get_controller()

        traffic_policies = ray.get(controller.get_traffic_policies.remote())
        for endpoint, traffic_policy in traffic_policies.items():
            await self.set_traffic(endpoint, traffic_policy)

        backend_dict = ray.get(controller.get_all_worker_handles.remote())
        for backend_tag, replica_dict in backend_dict.items():
            for replica_tag, worker in replica_dict.items():
                await self.add_new_worker(backend_tag, replica_tag, worker)

        backend_configs = ray.get(controller.get_backend_configs.remote())
        for backend, backend_config in backend_configs.items():
            await self.set_backend_config(backend, backend_config)

        # -- Metric Registration -- #
        [metric_exporter] = ray.get(controller.get_metric_exporter.remote())
        self.metric_client = MetricClient(metric_exporter)
        self.num_router_requests = self.metric_client.new_counter(
            "num_router_requests",
            description="Number of requests processed by the router.",
            label_names=("endpoint", ))
        self.num_error_endpoint_request = self.metric_client.new_counter(
            "num_error_endpoint_requests",
            description=("Number of requests errored when getting result "
                         "for endpoint."),
            label_names=("endpoint", ))
        self.num_error_backend_request = self.metric_client.new_counter(
            "num_error_backend_requests",
            description=("Number of requests errored when getting result "
                         "from backend."),
            label_names=("backend", ))