Example #1
0
    def __init__(
        self,
        host: str,
        port: int,
        root_path: str,
        controller_name: str,
        node_id: str,
        http_middlewares: Optional[
            List["starlette.middleware.Middleware"]] = None,
    ):  # noqa: F821
        configure_component_logger(component_name="http_proxy",
                                   component_id=node_id_to_ip_addr(node_id))

        if http_middlewares is None:
            http_middlewares = []

        self.host = host
        self.port = port
        self.root_path = root_path

        self.setup_complete = asyncio.Event()

        self.app = HTTPProxy(controller_name)

        self.wrapped_app = self.app
        for middleware in http_middlewares:
            self.wrapped_app = middleware.cls(self.wrapped_app,
                                              **middleware.options)

        # Start running the HTTP server on the event loop.
        # This task should be running forever. We track it in case of failure.
        self.running_task = asyncio.get_event_loop().create_task(self.run())
Example #2
0
    async def __init__(
        self,
        controller_name: str,
        http_config: HTTPOptions,
        checkpoint_path: str,
        detached: bool = False,
    ):
        configure_component_logger(component_name="controller",
                                   component_id=str(os.getpid()))

        # Used to read/write checkpoints.
        self.ray_worker_namespace = ray.get_runtime_context().namespace
        self.controller_name = controller_name
        self.checkpoint_path = checkpoint_path
        kv_store_namespace = f"{self.controller_name}-{self.ray_worker_namespace}"
        self.kv_store = make_kv_store(checkpoint_path,
                                      namespace=kv_store_namespace)
        self.snapshot_store = RayInternalKVStore(namespace=kv_store_namespace)

        # Dictionary of deployment_name -> proxy_name -> queue length.
        self.deployment_stats = defaultdict(lambda: defaultdict(dict))

        # Used to ensure that only a single state-changing operation happens
        # at any given time.
        self.write_lock = asyncio.Lock()

        self.long_poll_host = LongPollHost()

        self.http_state = HTTPState(
            controller_name,
            detached,
            http_config,
        )
        self.endpoint_state = EndpointState(self.kv_store, self.long_poll_host)

        # Fetch all running actors in current cluster as source of current
        # replica state for controller failure recovery
        all_current_actors = ray.util.list_named_actors(all_namespaces=True)
        all_serve_actor_names = [
            actor["name"] for actor in all_current_actors
            if actor["namespace"] == SERVE_NAMESPACE
        ]

        self.deployment_state_manager = DeploymentStateManager(
            controller_name,
            detached,
            self.kv_store,
            self.long_poll_host,
            all_serve_actor_names,
        )

        # Reference to Ray task executing most recent deployment request
        self.config_deployment_request_ref: ObjectRef = None

        # Unix timestamp of latest config deployment request. Defaults to 0.
        self.deployment_timestamp = 0

        asyncio.get_event_loop().create_task(self.run_control_loop())

        self._recover_config_from_checkpoint()
Example #3
0
    async def __init__(
        self,
        controller_name: str,
        http_config: HTTPOptions,
        checkpoint_path: str,
        detached: bool = False,
        _override_controller_namespace: Optional[str] = None,
    ):
        configure_component_logger(component_name="controller",
                                   component_id=str(os.getpid()))

        # Used to read/write checkpoints.
        self.controller_namespace = ray.get_runtime_context().namespace
        self.controller_name = controller_name
        self.checkpoint_path = checkpoint_path
        kv_store_namespace = f"{self.controller_name}-{self.controller_namespace}"
        self.kv_store = make_kv_store(checkpoint_path,
                                      namespace=kv_store_namespace)
        self.snapshot_store = RayInternalKVStore(namespace=kv_store_namespace)

        # Dictionary of deployment_name -> proxy_name -> queue length.
        self.deployment_stats = defaultdict(lambda: defaultdict(dict))

        # Used to ensure that only a single state-changing operation happens
        # at any given time.
        self.write_lock = asyncio.Lock()

        self.long_poll_host = LongPollHost()

        self.http_state = HTTPState(
            controller_name,
            detached,
            http_config,
            _override_controller_namespace=_override_controller_namespace,
        )
        self.endpoint_state = EndpointState(self.kv_store, self.long_poll_host)
        # Fetch all running actors in current cluster as source of current
        # replica state for controller failure recovery
        all_current_actor_names = ray.util.list_named_actors()
        self.deployment_state_manager = DeploymentStateManager(
            controller_name,
            detached,
            self.kv_store,
            self.long_poll_host,
            all_current_actor_names,
            _override_controller_namespace=_override_controller_namespace,
        )

        # Reference to Ray task executing most recent deployment request
        self.config_deployment_request_ref: ObjectRef = None

        # Unix timestamp of latest config deployment request. Defaults to 0.
        self.deployment_timestamp = 0

        # TODO(simon): move autoscaling related stuff into a manager.
        self.autoscaling_metrics_store = InMemoryMetricsStore()

        asyncio.get_event_loop().create_task(self.run_control_loop())
Example #4
0
        async def __init__(
            self,
            deployment_name,
            replica_tag,
            serialized_deployment_def: bytes,
            serialized_init_args: bytes,
            serialized_init_kwargs: bytes,
            deployment_config_proto_bytes: bytes,
            version: DeploymentVersion,
            controller_name: str,
            detached: bool,
        ):
            configure_component_logger(
                component_type="deployment",
                component_name=deployment_name,
                component_id=replica_tag,
            )

            deployment_def = cloudpickle.loads(serialized_deployment_def)

            if isinstance(deployment_def, str):
                import_path = deployment_def
                module_name, attr_name = parse_import_path(import_path)
                deployment_def = getattr(import_module(module_name), attr_name)
                # For ray or serve decorated class or function, strip to return
                # original body
                if isinstance(deployment_def, RemoteFunction):
                    deployment_def = deployment_def._function
                elif isinstance(deployment_def, ActorClass):
                    deployment_def = deployment_def.__ray_metadata__.modified_class
                elif isinstance(deployment_def, Deployment):
                    logger.warning(
                        f'The import path "{import_path}" contains a '
                        "decorated Serve deployment. The decorator's settings "
                        "are ignored when deploying via import path.")
                    deployment_def = deployment_def.func_or_class

            init_args = cloudpickle.loads(serialized_init_args)
            init_kwargs = cloudpickle.loads(serialized_init_kwargs)

            deployment_config = DeploymentConfig.from_proto_bytes(
                deployment_config_proto_bytes)

            if inspect.isfunction(deployment_def):
                is_function = True
            elif inspect.isclass(deployment_def):
                is_function = False
            else:
                assert False, (
                    "deployment_def must be function, class, or "
                    "corresponding import path. Instead, it's type was "
                    f"{type(deployment_def)}.")

            # Set the controller name so that serve.connect() in the user's
            # code will connect to the instance that this deployment is running
            # in.
            ray.serve.context.set_internal_replica_context(
                deployment_name,
                replica_tag,
                controller_name,
                servable_object=None,
            )

            assert controller_name, "Must provide a valid controller_name"

            controller_handle = ray.get_actor(controller_name,
                                              namespace=SERVE_NAMESPACE)

            # This closure initializes user code and finalizes replica
            # startup. By splitting the initialization step like this,
            # we can already access this actor before the user code
            # has finished initializing.
            # The supervising state manager can then wait
            # for allocation of this replica by using the `is_allocated`
            # method. After that, it calls `reconfigure` to trigger
            # user code initialization.
            async def initialize_replica():
                if is_function:
                    _callable = deployment_def
                else:
                    # This allows deployments to define an async __init__
                    # method (required for FastAPI).
                    _callable = deployment_def.__new__(deployment_def)
                    await sync_to_async(_callable.__init__)(*init_args,
                                                            **init_kwargs)

                # Setting the context again to update the servable_object.
                ray.serve.context.set_internal_replica_context(
                    deployment_name,
                    replica_tag,
                    controller_name,
                    servable_object=_callable,
                )

                self.replica = RayServeReplica(
                    _callable,
                    deployment_name,
                    replica_tag,
                    deployment_config,
                    deployment_config.user_config,
                    version,
                    is_function,
                    controller_handle,
                )

            # Is it fine that replica is None here?
            # Should we add a check in all methods that use self.replica
            # or, alternatively, create an async get_replica() method?
            self.replica = None
            self._initialize_replica = initialize_replica