Beispiel #1
0
    def __init__(
        self,
        controller_name: str,
        controller_namespace: str,
    ):
        # Set the controller name so that serve will connect to the
        # controller instance this proxy is running in.
        ray.serve.context.set_internal_replica_context(None, None,
                                                       controller_name,
                                                       controller_namespace,
                                                       None)

        # Used only for displaying the route table.
        self.route_info: Dict[str, EndpointTag] = dict()

        def get_handle(name):
            return serve.context.get_global_client().get_handle(
                name,
                sync=False,
                missing_ok=True,
                _internal_pickled_http_request=True,
            )

        self.prefix_router = LongestPrefixRouter(get_handle)
        self.long_poll_client = LongPollClient(
            ray.get_actor(controller_name, namespace=controller_namespace),
            {
                LongPollNamespace.ROUTE_TABLE: self._update_routes,
            },
            call_in_event_loop=asyncio.get_event_loop(),
        )
        self.request_counter = metrics.Counter(
            "serve_num_http_requests",
            description="The number of HTTP requests processed.",
            tag_keys=("route", ),
        )

        self.request_error_counter = metrics.Counter(
            "serve_num_http_error_requests",
            description="The number of non-200 HTTP responses.",
            tag_keys=("route", "error_code"),
        )

        self.deployment_request_error_counter = metrics.Counter(
            "serve_num_deployment_http_error_requests",
            description=
            ("The number of non-200 HTTP responses returned by each deployment."
             ),
            tag_keys=("deployment", ),
        )
Beispiel #2
0
    def __init__(self, controller_name: str):
        # Set the controller name so that serve will connect to the
        # controller instance this proxy is running in.
        ray.serve.api._set_internal_replica_context(None, None,
                                                    controller_name, None)

        # Used only for displaying the route table.
        self.route_info: Dict[str, Tuple[EndpointTag, List[str]]] = dict()

        # NOTE(edoakes): we currently have both a Starlette router and a
        # longest-prefix router to maintain compatibility with the old API.
        # We first match on the Starlette router (which contains routes using
        # the old API) and then fall back to the prefix router. The Starlette
        # router can be removed once we deprecate the old API.
        self.starlette_router = starlette.routing.Router(
            default=self._fallback_to_prefix_router)
        self.prefix_router = LongestPrefixRouter()
        self.long_poll_client = LongPollClient(
            ray.get_actor(controller_name), {
                LongPollNamespace.ROUTE_TABLE: self._update_routes,
            },
            call_in_event_loop=asyncio.get_event_loop())
        self.request_counter = metrics.Counter(
            "serve_num_http_requests",
            description="The number of HTTP requests processed.",
            tag_keys=("route", ))
Beispiel #3
0
 def __init__(self):
     self.my_counter = metrics.Counter(
         "my_counter",
         description=("The number of excellent requests to this backend."),
         tag_keys=("deployment", ))
     self.my_counter.set_default_tags(
         {"deployment": serve.get_current_deployment()})
Beispiel #4
0
    def __init__(
        self,
        controller_handle: ActorHandle,
        deployment_name: str,
        event_loop: asyncio.BaseEventLoop = None,
    ):
        """Router process incoming queries: assign a replica.

        Args:
            controller_handle(ActorHandle): The controller handle.
        """
        self._event_loop = event_loop
        self._replica_set = ReplicaSet(deployment_name, event_loop)

        # -- Metrics Registration -- #
        self.num_router_requests = metrics.Counter(
            "serve_num_router_requests",
            description="The number of requests processed by the router.",
            tag_keys=("deployment", ))
        self.num_router_requests.set_default_tags(
            {"deployment": deployment_name})

        self.long_poll_client = LongPollClient(
            controller_handle,
            {
                (LongPollNamespace.RUNNING_REPLICAS, deployment_name):
                self._replica_set.update_running_replicas,
            },
            call_in_event_loop=event_loop,
        )
Beispiel #5
0
    def __init__(
        self,
        controller_handle: ActorHandle,
        endpoint_tag: EndpointTag,
        loop: asyncio.BaseEventLoop = None,
    ):
        """Router process incoming queries: choose backend, and assign replica.

        Args:
            controller_handle(ActorHandle): The controller handle.
        """
        self.controller = controller_handle
        self.endpoint_tag = endpoint_tag
        self.endpoint_policy: Optional[EndpointPolicy] = None
        self.backend_replicas: Dict[BackendTag, ReplicaSet] = dict()
        self._pending_endpoint_registered = asyncio.Event(loop=loop)
        self._loop = loop or asyncio.get_event_loop()

        # -- Metrics Registration -- #
        self.num_router_requests = metrics.Counter(
            "serve_num_router_requests",
            description="The number of requests processed by the router.",
            tag_keys=("endpoint", ))

        self.long_poll_client = LongPollClient(
            self.controller,
            {
                (LongPollNamespace.TRAFFIC_POLICIES, endpoint_tag):
                self._update_traffic_policy,
            },
            call_in_event_loop=self._loop,
        )
Beispiel #6
0
    def __init__(
        self,
        controller_handle: ActorHandle,
        endpoint_name: EndpointTag,
        handle_options: Optional[HandleOptions] = None,
        *,
        known_python_methods: List[str] = [],
        _router: Optional[EndpointRouter] = None,
        _internal_use_serve_request: Optional[bool] = True,
        _internal_pickled_http_request: bool = False,
    ):
        self.controller_handle = controller_handle
        self.endpoint_name = endpoint_name
        self.handle_options = handle_options or HandleOptions()
        self.known_python_methods = known_python_methods
        self.handle_tag = f"{self.endpoint_name}#{get_random_letters()}"
        self._use_serve_request = _internal_use_serve_request
        self._pickled_http_request = _internal_pickled_http_request

        self.request_counter = metrics.Counter(
            "serve_handle_request_counter",
            description=("The number of handle.remote() calls that have been "
                         "made on this handle."),
            tag_keys=("handle", "endpoint"))
        self.request_counter.set_default_tags({
            "handle": self.handle_tag,
            "endpoint": self.endpoint_name
        })

        self.router: EndpointRouter = _router or self._make_router()
Beispiel #7
0
    def __init__(
            self,
            controller_handle: ActorHandle,
            backend_tag: BackendTag,
            event_loop: asyncio.BaseEventLoop = None,
    ):
        """Router process incoming queries: choose backend, and assign replica.

        Args:
            controller_handle(ActorHandle): The controller handle.
        """
        self._event_loop = event_loop
        self._replica_set = ReplicaSet(backend_tag, event_loop)

        # -- Metrics Registration -- #
        self.num_router_requests = metrics.Counter(
            "serve_num_router_requests",
            description="The number of requests processed by the router.",
            tag_keys=("deployment", ))
        self.num_router_requests.set_default_tags({"deployment": backend_tag})

        self.long_poll_client = LongPollClient(
            controller_handle,
            {
                (LongPollNamespace.BACKEND_CONFIGS, backend_tag): self.
                _replica_set.set_max_concurrent_queries,
                (LongPollNamespace.REPLICA_HANDLES, backend_tag): self.
                _replica_set.update_worker_replicas,
            },
            call_in_event_loop=event_loop,
        )
Beispiel #8
0
    def __init__(
            self,
            controller_handle: ActorHandle,
            deployment_name: EndpointTag,
            handle_options: Optional[HandleOptions] = None,
            *,
            _router: Optional[Router] = None,
            _internal_pickled_http_request: bool = False,
    ):
        self.controller_handle = controller_handle
        self.deployment_name = deployment_name
        self.handle_options = handle_options or HandleOptions()
        self.handle_tag = f"{self.deployment_name}#{get_random_letters()}"
        self._pickled_http_request = _internal_pickled_http_request

        self.request_counter = metrics.Counter(
            "serve_handle_request_counter",
            description=("The number of handle.remote() calls that have been "
                         "made on this handle."),
            tag_keys=("handle", "deployment"))
        self.request_counter.set_default_tags({
            "handle": self.handle_tag,
            "deployment": self.deployment_name
        })

        self.router: Router = _router or self._make_router()
Beispiel #9
0
 def __init__(self, controller_name: str):
     # Set the controller name so that serve will connect to the
     # controller instance this proxy is running in.
     ray.serve.api._set_internal_replica_context(None, None,
                                                 controller_name, None)
     self.router = LongestPrefixRouter()
     self.long_poll_client = LongPollClient(
         ray.get_actor(controller_name), {
             LongPollNamespace.ROUTE_TABLE: self.router.update_routes,
         },
         call_in_event_loop=asyncio.get_event_loop())
     self.request_counter = metrics.Counter(
         "serve_num_http_requests",
         description="The number of HTTP requests processed.",
         tag_keys=("route", ))
Beispiel #10
0
    def __init__(
        self,
        controller_handle: ActorHandle,
        deployment_name: EndpointTag,
        handle_options: Optional[HandleOptions] = None,
        *,
        _router: Optional[Router] = None,
        _internal_pickled_http_request: bool = False,
    ):
        self.controller_handle = controller_handle
        self.deployment_name = deployment_name
        self.handle_options = handle_options or HandleOptions()
        self.handle_tag = f"{self.deployment_name}#{get_random_letters()}"
        self._pickled_http_request = _internal_pickled_http_request

        self.request_counter = metrics.Counter(
            "serve_handle_request_counter",
            description=("The number of handle.remote() calls that have been "
                         "made on this handle."),
            tag_keys=("handle", "deployment"),
        )
        self.request_counter.set_default_tags({
            "handle":
            self.handle_tag,
            "deployment":
            self.deployment_name
        })

        self.router: Router = _router or self._make_router()

        deployment_route = DeploymentRoute.FromString(
            ray.get(
                self.controller_handle.get_deployment_info.remote(
                    self.deployment_name)))
        deployment_info = DeploymentInfo.from_proto(
            deployment_route.deployment_info)

        self._stop_event: Optional[threading.Event] = None
        self._pusher: Optional[threading.Thread] = None
        remote_func = self.controller_handle.record_handle_metrics.remote
        if deployment_info.deployment_config.autoscaling_config:
            self._stop_event = threading.Event()
            self._pusher = start_metrics_pusher(
                interval_s=HANDLE_METRIC_PUSH_INTERVAL_S,
                collection_callback=self._collect_handle_queue_metrics,
                metrics_process_func=remote_func,
                stop_event=self._stop_event,
            )
Beispiel #11
0
    def __init__(self, controller_handle: ActorHandle):
        """Router process incoming queries: choose backend, and assign replica.

        Args:
            controller_handle(ActorHandle): The controller handle.
        """
        self.controller = controller_handle

        self.endpoint_policies: Dict[str, EndpointPolicy] = dict()

        self.backend_replicas: Dict[str, ReplicaSet] = dict()

        self._pending_endpoints: Dict[str, asyncio.Future] = dict()

        # -- Metrics Registration -- #
        self.num_router_requests = metrics.Counter(
            "serve_num_router_requests",
            description="The number of requests processed by the router.",
            tag_keys=("endpoint", ))
Beispiel #12
0
    def __init__(
            self,
            router,  # ThreadProxiedRouter
            endpoint_name,
            handle_options: Optional[HandleOptions] = None):
        self.router = router
        self.endpoint_name = endpoint_name
        self.handle_options = handle_options or HandleOptions()
        self.handle_tag = f"{self.endpoint_name}#{get_random_letters()}"

        self.request_counter = metrics.Counter(
            "serve_handle_request_counter",
            description=("The number of handle.remote() calls that have been "
                         "made on this handle."),
            tag_keys=("handle", "endpoint"))
        self.request_counter.set_default_tags({
            "handle": self.handle_tag,
            "endpoint": self.endpoint_name
        })
Beispiel #13
0
    def __init__(self, controller_name: str):
        # Set the controller name so that serve.connect() will connect to the
        # controller instance this proxy is running in.
        ray.serve.api._set_internal_replica_context(None, None,
                                                    controller_name, None)

        controller = ray.get_actor(controller_name)

        self.router = starlette.routing.Router(default=self._not_found)

        # route -> (endpoint_tag, methods).  Updated via long polling.
        self.route_table: Dict[str, Tuple[EndpointTag, List[str]]] = {}

        self.long_poll_client = LongPollClient(controller, {
            LongPollNamespace.ROUTE_TABLE: self._update_route_table,
        })

        self.request_counter = metrics.Counter(
            "serve_num_http_requests",
            description="The number of HTTP requests processed.",
            tag_keys=("route", ))
Beispiel #14
0
    def __init__(
        self,
        _callable: Callable,
        deployment_name: str,
        replica_tag: ReplicaTag,
        deployment_config: DeploymentConfig,
        user_config: Any,
        version: DeploymentVersion,
        is_function: bool,
        controller_handle: ActorHandle,
    ) -> None:
        self.deployment_config = deployment_config
        self.deployment_name = deployment_name
        self.replica_tag = replica_tag
        self.callable = _callable
        self.is_function = is_function
        self.user_config = user_config
        self.version = version
        self.rwlock = aiorwlock.RWLock()

        user_health_check = getattr(_callable, HEALTH_CHECK_METHOD, None)
        if not callable(user_health_check):

            def user_health_check():
                pass

        self.user_health_check = sync_to_async(user_health_check)

        self.num_ongoing_requests = 0

        self.request_counter = metrics.Counter(
            "serve_deployment_request_counter",
            description=
            ("The number of queries that have been processed in this replica."
             ),
            tag_keys=("deployment", "replica"),
        )
        self.request_counter.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.error_counter = metrics.Counter(
            "serve_deployment_error_counter",
            description=(
                "The number of exceptions that have occurred in this replica."
            ),
            tag_keys=("deployment", "replica"),
        )
        self.error_counter.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.restart_counter = metrics.Counter(
            "serve_deployment_replica_starts",
            description=
            ("The number of times this replica has been restarted due to failure."
             ),
            tag_keys=("deployment", "replica"),
        )
        self.restart_counter.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.processing_latency_tracker = metrics.Histogram(
            "serve_deployment_processing_latency_ms",
            description="The latency for queries to be processed.",
            boundaries=DEFAULT_LATENCY_BUCKET_MS,
            tag_keys=("deployment", "replica"),
        )
        self.processing_latency_tracker.set_default_tags({
            "deployment":
            self.deployment_name,
            "replica":
            self.replica_tag
        })

        self.num_processing_items = metrics.Gauge(
            "serve_replica_processing_queries",
            description="The current number of queries being processed.",
            tag_keys=("deployment", "replica"),
        )
        self.num_processing_items.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.restart_counter.inc()

        self._shutdown_wait_loop_s = deployment_config.graceful_shutdown_wait_loop_s

        if deployment_config.autoscaling_config:
            process_remote_func = controller_handle.record_autoscaling_metrics.remote
            config = deployment_config.autoscaling_config
            start_metrics_pusher(
                interval_s=config.metrics_interval_s,
                collection_callback=self._collect_autoscaling_metrics,
                metrics_process_func=process_remote_func,
            )

        # NOTE(edoakes): we used to recommend that users use the "ray" logger
        # and tagged the logs with metadata as below. We now recommend using
        # the "ray.serve" 'component logger' (as of Ray 1.13). This is left to
        # maintain backwards compatibility with users who were using the
        # existing logger. We can consider removing it in Ray 2.0.
        ray_logger = logging.getLogger("ray")
        for handler in ray_logger.handlers:
            handler.setFormatter(
                logging.Formatter(
                    handler.formatter._fmt +
                    f" component=serve deployment={self.deployment_name} "
                    f"replica={self.replica_tag}"))
Beispiel #15
0
    def __init__(self, _callable: Callable, backend_config: BackendConfig,
                 is_function: bool, controller_handle: ActorHandle) -> None:
        self.backend_tag = ray.serve.api.get_replica_context().backend_tag
        self.replica_tag = ray.serve.api.get_replica_context().replica_tag
        self.callable = _callable
        self.is_function = is_function

        self.config = backend_config
        self.batch_queue = _BatchQueue(self.config.max_batch_size or 1,
                                       self.config.batch_wait_timeout)
        self.reconfigure(self.config.user_config)

        self.num_ongoing_requests = 0

        self.request_counter = metrics.Counter(
            "serve_backend_request_counter",
            description=("The number of queries that have been "
                         "processed in this replica."),
            tag_keys=("backend", ))
        self.request_counter.set_default_tags({"backend": self.backend_tag})

        self.long_poll_client = LongPollAsyncClient(controller_handle, {
            LongPollKey.BACKEND_CONFIGS: self._update_backend_configs,
        })

        self.error_counter = metrics.Counter(
            "serve_backend_error_counter",
            description=("The number of exceptions that have "
                         "occurred in the backend."),
            tag_keys=("backend", ))
        self.error_counter.set_default_tags({"backend": self.backend_tag})

        self.restart_counter = metrics.Counter(
            "serve_backend_replica_starts",
            description=("The number of times this replica "
                         "has been restarted due to failure."),
            tag_keys=("backend", "replica"))
        self.restart_counter.set_default_tags({
            "backend": self.backend_tag,
            "replica": self.replica_tag
        })

        self.queuing_latency_tracker = metrics.Histogram(
            "serve_backend_queuing_latency_ms",
            description=("The latency for queries in the replica's queue "
                         "waiting to be processed or batched."),
            boundaries=DEFAULT_LATENCY_BUCKET_MS,
            tag_keys=("backend", "replica"))
        self.queuing_latency_tracker.set_default_tags({
            "backend": self.backend_tag,
            "replica": self.replica_tag
        })

        self.processing_latency_tracker = metrics.Histogram(
            "serve_backend_processing_latency_ms",
            description="The latency for queries to be processed.",
            boundaries=DEFAULT_LATENCY_BUCKET_MS,
            tag_keys=("backend", "replica", "batch_size"))
        self.processing_latency_tracker.set_default_tags({
            "backend": self.backend_tag,
            "replica": self.replica_tag
        })

        self.num_queued_items = metrics.Gauge(
            "serve_replica_queued_queries",
            description=("The current number of queries queued in "
                         "the backend replicas."),
            tag_keys=("backend", "replica"))
        self.num_queued_items.set_default_tags({
            "backend": self.backend_tag,
            "replica": self.replica_tag
        })

        self.num_processing_items = metrics.Gauge(
            "serve_replica_processing_queries",
            description="The current number of queries being processed.",
            tag_keys=("backend", "replica"))
        self.num_processing_items.set_default_tags({
            "backend": self.backend_tag,
            "replica": self.replica_tag
        })

        self.restart_counter.inc()

        ray_logger = logging.getLogger("ray")
        for handler in ray_logger.handlers:
            handler.setFormatter(
                logging.Formatter(
                    handler.formatter._fmt +
                    f" component=serve backend={self.backend_tag} "
                    f"replica={self.replica_tag}"))

        asyncio.get_event_loop().create_task(self.main_loop())
Beispiel #16
0
    def __init__(
        self,
        _callable: Callable,
        deployment_name: str,
        replica_tag: ReplicaTag,
        deployment_config: DeploymentConfig,
        user_config: Any,
        version: DeploymentVersion,
        is_function: bool,
        controller_handle: ActorHandle,
    ) -> None:
        self.deployment_config = deployment_config
        self.deployment_name = deployment_name
        self.replica_tag = replica_tag
        self.callable = _callable
        self.is_function = is_function
        self.user_config = user_config
        self.version = version
        self.rwlock = aiorwlock.RWLock()

        user_health_check = getattr(_callable, HEALTH_CHECK_METHOD, None)
        if not callable(user_health_check):

            def user_health_check():
                pass

        self.user_health_check = sync_to_async(user_health_check)

        self.num_ongoing_requests = 0

        self.request_counter = metrics.Counter(
            "serve_deployment_request_counter",
            description=("The number of queries that have been "
                         "processed in this replica."),
            tag_keys=("deployment", "replica"),
        )
        self.request_counter.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.error_counter = metrics.Counter(
            "serve_deployment_error_counter",
            description=("The number of exceptions that have "
                         "occurred in this replica."),
            tag_keys=("deployment", "replica"),
        )
        self.error_counter.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.restart_counter = metrics.Counter(
            "serve_deployment_replica_starts",
            description=("The number of times this replica "
                         "has been restarted due to failure."),
            tag_keys=("deployment", "replica"),
        )
        self.restart_counter.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.processing_latency_tracker = metrics.Histogram(
            "serve_deployment_processing_latency_ms",
            description="The latency for queries to be processed.",
            boundaries=DEFAULT_LATENCY_BUCKET_MS,
            tag_keys=("deployment", "replica"),
        )
        self.processing_latency_tracker.set_default_tags({
            "deployment":
            self.deployment_name,
            "replica":
            self.replica_tag
        })

        self.num_processing_items = metrics.Gauge(
            "serve_replica_processing_queries",
            description="The current number of queries being processed.",
            tag_keys=("deployment", "replica"),
        )
        self.num_processing_items.set_default_tags({
            "deployment": self.deployment_name,
            "replica": self.replica_tag
        })

        self.restart_counter.inc()

        self._shutdown_wait_loop_s = deployment_config.graceful_shutdown_wait_loop_s

        if deployment_config.autoscaling_config:
            config = deployment_config.autoscaling_config
            start_metrics_pusher(
                interval_s=config.metrics_interval_s,
                collection_callback=self._collect_autoscaling_metrics,
                controller_handle=controller_handle,
            )

        ray_logger = logging.getLogger("ray")
        for handler in ray_logger.handlers:
            handler.setFormatter(
                logging.Formatter(
                    handler.formatter._fmt +
                    f" component=serve deployment={self.deployment_name} "
                    f"replica={self.replica_tag}"))
Beispiel #17
0
    def __init__(self, _callable: Callable, backend_config: BackendConfig,
                 is_function: bool, controller_handle: ActorHandle) -> None:
        self.backend_tag = ray.serve.api.get_replica_context().deployment
        self.replica_tag = ray.serve.api.get_replica_context().replica_tag
        self.callable = _callable
        self.is_function = is_function

        self.config = backend_config

        self.num_ongoing_requests = 0

        self.request_counter = metrics.Counter(
            "serve_deployment_request_counter",
            description=("The number of queries that have been "
                         "processed in this replica."),
            tag_keys=("deployment", "replica"))
        self.request_counter.set_default_tags({
            "deployment": self.backend_tag,
            "replica": self.replica_tag
        })

        self.loop = asyncio.get_event_loop()
        self.long_poll_client = LongPollClient(
            controller_handle,
            {
                (LongPollNamespace.BACKEND_CONFIGS, self.backend_tag):
                self._update_backend_configs,
            },
            call_in_event_loop=self.loop,
        )

        self.error_counter = metrics.Counter(
            "serve_deployment_error_counter",
            description=("The number of exceptions that have "
                         "occurred in this replica."),
            tag_keys=("deployment", "replica"))
        self.error_counter.set_default_tags({
            "deployment": self.backend_tag,
            "replica": self.replica_tag
        })

        self.restart_counter = metrics.Counter(
            "serve_deployment_replica_starts",
            description=("The number of times this replica "
                         "has been restarted due to failure."),
            tag_keys=("deployment", "replica"))
        self.restart_counter.set_default_tags({
            "deployment": self.backend_tag,
            "replica": self.replica_tag
        })

        self.processing_latency_tracker = metrics.Histogram(
            "serve_deployment_processing_latency_ms",
            description="The latency for queries to be processed.",
            boundaries=DEFAULT_LATENCY_BUCKET_MS,
            tag_keys=("deployment", "replica"))
        self.processing_latency_tracker.set_default_tags({
            "deployment":
            self.backend_tag,
            "replica":
            self.replica_tag
        })

        self.num_processing_items = metrics.Gauge(
            "serve_replica_processing_queries",
            description="The current number of queries being processed.",
            tag_keys=("deployment", "replica"))
        self.num_processing_items.set_default_tags({
            "deployment": self.backend_tag,
            "replica": self.replica_tag
        })

        self.restart_counter.inc()

        ray_logger = logging.getLogger("ray")
        for handler in ray_logger.handlers:
            handler.setFormatter(
                logging.Formatter(
                    handler.formatter._fmt +
                    f" component=serve deployment={self.backend_tag} "
                    f"replica={self.replica_tag}"))