Python Gauge Examples

Programming Language: Python

Namespace/Package Name: ray.experimental.metrics

Method/Function: Gauge

Examples at hotexamples.com: 2

Python Gauge - 2 examples found. These are the top rated real world Python examples of ray.experimental.metrics.Gauge extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: backend_worker.py Project: shuminghu/ray

    def __init__(self, backend_tag: str, replica_tag: str, _callable: Callable,
                 backend_config: BackendConfig, is_function: bool) -> None:
        self.backend_tag = backend_tag
        self.replica_tag = replica_tag
        self.callable = _callable
        self.is_function = is_function

        self.config = backend_config
        self.batch_queue = BatchQueue(self.config.max_batch_size or 1,
                                      self.config.batch_wait_timeout)

        self.num_ongoing_requests = 0

        self.request_counter = metrics.Count(
            "backend_request_counter", ("Number of queries that have been "
                                        "processed in this replica"),
            "requests", ["backend"])
        self.error_counter = metrics.Count("backend_error_counter",
                                           ("Number of exceptions that have "
                                            "occurred in the backend"),
                                           "errors", ["backend"])
        self.restart_counter = metrics.Count(
            "backend_worker_starts",
            ("The number of time this replica workers "
             "has been restarted due to failure."), "restarts",
            ["backend", "replica_tag"])

        self.queuing_latency_tracker = metrics.Histogram(
            "backend_queuing_latency_ms",
            ("The latency for queries waiting in the replica's queue "
             "waiting to be processed or batched."), "ms",
            DEFAULT_LATENCY_BUCKET_MS, ["backend", "replica_tag"])
        self.processing_latency_tracker = metrics.Histogram(
            "backend_processing_latency_ms",
            "The latency for queries to be processed", "ms",
            DEFAULT_LATENCY_BUCKET_MS,
            ["backend", "replica_tag", "batch_size"])
        self.num_queued_items = metrics.Gauge(
            "replica_queued_queries",
            "Current number of queries queued in the the backend replicas",
            "requests", ["backend", "replica_tag"])
        self.num_processing_items = metrics.Gauge(
            "replica_processing_queries",
            "Current number of queries being processed", "requests",
            ["backend", "replica_tag"])

        self.restart_counter.record(1, {
            "backend": self.backend_tag,
            "replica_tag": self.replica_tag
        })

        asyncio.get_event_loop().create_task(self.main_loop())

Example #2

Show file

File: router.py Project: shuminghu/ray

    async def setup(self, name, controller_name):
        # Note: Several queues are used in the router
        # - When a request come in, it's placed inside its corresponding
        #   endpoint_queue.
        # - The endpoint_queue is dequeued during flush operation, which moves
        #   the queries to backend buffer_queue. Here we match a request
        #   for an endpoint to a backend given some policy.
        # - The worker_queue is used to collect idle actor handle. These
        #   handles are dequed during the second stage of flush operation,
        #   which assign queries in buffer_queue to actor handle.

        self.name = name

        # -- Queues -- #

        # endpoint_name -> request queue
        # We use FIFO (left to right) ordering. The new items should be added
        # using appendleft. Old items should be removed via pop().
        self.endpoint_queues: DefaultDict[deque[Query]] = defaultdict(deque)
        # backend_name -> worker replica tag queue
        self.worker_queues: DefaultDict[deque[str]] = defaultdict(deque)
        # backend_name -> worker payload queue
        self.backend_queues = defaultdict(deque)

        # -- Metadata -- #

        # endpoint_name -> traffic_policy
        self.traffic = dict()
        # backend_name -> backend_config
        self.backend_info = dict()
        # replica tag -> worker_handle
        self.replicas = dict()
        # backend_name -> replica_tag -> concurrent queries counter
        self.queries_counter = defaultdict(lambda: defaultdict(int))

        # -- Synchronization -- #

        # This lock guarantee that only one flush operation can happen at a
        # time. Without the lock, multiple flush operation can pop from the
        # same buffer_queue and worker_queue and create deadlock. For example,
        # an operation holding the only query and the other flush operation
        # holding the only idle replica. Additionally, allowing only one flush
        # operation at a time simplifies design overhead for custom queuing and
        # batching policies.
        self.flush_lock = asyncio.Lock()

        # -- State Restoration -- #
        # Fetch the worker handles, traffic policies, and backend configs from
        # the controller. We use a "pull-based" approach instead of pushing
        # them from the controller so that the router can transparently recover
        # from failure.
        self.controller = ray.get_actor(controller_name)

        traffic_policies = ray.get(
            self.controller.get_traffic_policies.remote())
        for endpoint, traffic_policy in traffic_policies.items():
            await self.set_traffic(endpoint, traffic_policy)

        backend_dict = ray.get(self.controller.get_all_worker_handles.remote())
        for backend_tag, replica_dict in backend_dict.items():
            for replica_tag, worker in replica_dict.items():
                await self.add_new_worker(backend_tag, replica_tag, worker)

        backend_configs = ray.get(self.controller.get_backend_configs.remote())
        for backend, backend_config in backend_configs.items():
            await self.set_backend_config(backend, backend_config)

        # -- Metrics Registration -- #
        self.num_router_requests = metrics.Count(
            "num_router_requests",
            "Number of requests processed by the router.", "requests",
            ["endpoint"])
        self.num_error_endpoint_requests = metrics.Count(
            "num_error_endpoint_requests",
            ("Number of requests that errored when getting results "
             "for the endpoint."), "requests", ["endpoint"])
        self.num_error_backend_requests = metrics.Count(
            "num_error_backend_requests",
            ("Number of requests that errored when getting result "
             "from the backend."), "requests", ["backend"])

        self.backend_queue_size = metrics.Gauge(
            "backend_queued_queries",
            "Current number of queries queued in the router for a backend",
            "requests", ["backend"])

        asyncio.get_event_loop().create_task(self.report_queue_lengths())