def __init__( self, host: str, port: int, root_path: str, controller_name: str, node_id: str, http_middlewares: Optional[ List["starlette.middleware.Middleware"]] = None, ): # noqa: F821 configure_component_logger(component_name="http_proxy", component_id=node_id_to_ip_addr(node_id)) if http_middlewares is None: http_middlewares = [] self.host = host self.port = port self.root_path = root_path self.setup_complete = asyncio.Event() self.app = HTTPProxy(controller_name) self.wrapped_app = self.app for middleware in http_middlewares: self.wrapped_app = middleware.cls(self.wrapped_app, **middleware.options) # Start running the HTTP server on the event loop. # This task should be running forever. We track it in case of failure. self.running_task = asyncio.get_event_loop().create_task(self.run())
async def __init__( self, controller_name: str, http_config: HTTPOptions, checkpoint_path: str, detached: bool = False, ): configure_component_logger(component_name="controller", component_id=str(os.getpid())) # Used to read/write checkpoints. self.ray_worker_namespace = ray.get_runtime_context().namespace self.controller_name = controller_name self.checkpoint_path = checkpoint_path kv_store_namespace = f"{self.controller_name}-{self.ray_worker_namespace}" self.kv_store = make_kv_store(checkpoint_path, namespace=kv_store_namespace) self.snapshot_store = RayInternalKVStore(namespace=kv_store_namespace) # Dictionary of deployment_name -> proxy_name -> queue length. self.deployment_stats = defaultdict(lambda: defaultdict(dict)) # Used to ensure that only a single state-changing operation happens # at any given time. self.write_lock = asyncio.Lock() self.long_poll_host = LongPollHost() self.http_state = HTTPState( controller_name, detached, http_config, ) self.endpoint_state = EndpointState(self.kv_store, self.long_poll_host) # Fetch all running actors in current cluster as source of current # replica state for controller failure recovery all_current_actors = ray.util.list_named_actors(all_namespaces=True) all_serve_actor_names = [ actor["name"] for actor in all_current_actors if actor["namespace"] == SERVE_NAMESPACE ] self.deployment_state_manager = DeploymentStateManager( controller_name, detached, self.kv_store, self.long_poll_host, all_serve_actor_names, ) # Reference to Ray task executing most recent deployment request self.config_deployment_request_ref: ObjectRef = None # Unix timestamp of latest config deployment request. Defaults to 0. self.deployment_timestamp = 0 asyncio.get_event_loop().create_task(self.run_control_loop()) self._recover_config_from_checkpoint()
async def __init__( self, controller_name: str, http_config: HTTPOptions, checkpoint_path: str, detached: bool = False, _override_controller_namespace: Optional[str] = None, ): configure_component_logger(component_name="controller", component_id=str(os.getpid())) # Used to read/write checkpoints. self.controller_namespace = ray.get_runtime_context().namespace self.controller_name = controller_name self.checkpoint_path = checkpoint_path kv_store_namespace = f"{self.controller_name}-{self.controller_namespace}" self.kv_store = make_kv_store(checkpoint_path, namespace=kv_store_namespace) self.snapshot_store = RayInternalKVStore(namespace=kv_store_namespace) # Dictionary of deployment_name -> proxy_name -> queue length. self.deployment_stats = defaultdict(lambda: defaultdict(dict)) # Used to ensure that only a single state-changing operation happens # at any given time. self.write_lock = asyncio.Lock() self.long_poll_host = LongPollHost() self.http_state = HTTPState( controller_name, detached, http_config, _override_controller_namespace=_override_controller_namespace, ) self.endpoint_state = EndpointState(self.kv_store, self.long_poll_host) # Fetch all running actors in current cluster as source of current # replica state for controller failure recovery all_current_actor_names = ray.util.list_named_actors() self.deployment_state_manager = DeploymentStateManager( controller_name, detached, self.kv_store, self.long_poll_host, all_current_actor_names, _override_controller_namespace=_override_controller_namespace, ) # Reference to Ray task executing most recent deployment request self.config_deployment_request_ref: ObjectRef = None # Unix timestamp of latest config deployment request. Defaults to 0. self.deployment_timestamp = 0 # TODO(simon): move autoscaling related stuff into a manager. self.autoscaling_metrics_store = InMemoryMetricsStore() asyncio.get_event_loop().create_task(self.run_control_loop())
async def __init__( self, deployment_name, replica_tag, serialized_deployment_def: bytes, serialized_init_args: bytes, serialized_init_kwargs: bytes, deployment_config_proto_bytes: bytes, version: DeploymentVersion, controller_name: str, detached: bool, ): configure_component_logger( component_type="deployment", component_name=deployment_name, component_id=replica_tag, ) deployment_def = cloudpickle.loads(serialized_deployment_def) if isinstance(deployment_def, str): import_path = deployment_def module_name, attr_name = parse_import_path(import_path) deployment_def = getattr(import_module(module_name), attr_name) # For ray or serve decorated class or function, strip to return # original body if isinstance(deployment_def, RemoteFunction): deployment_def = deployment_def._function elif isinstance(deployment_def, ActorClass): deployment_def = deployment_def.__ray_metadata__.modified_class elif isinstance(deployment_def, Deployment): logger.warning( f'The import path "{import_path}" contains a ' "decorated Serve deployment. The decorator's settings " "are ignored when deploying via import path.") deployment_def = deployment_def.func_or_class init_args = cloudpickle.loads(serialized_init_args) init_kwargs = cloudpickle.loads(serialized_init_kwargs) deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if inspect.isfunction(deployment_def): is_function = True elif inspect.isclass(deployment_def): is_function = False else: assert False, ( "deployment_def must be function, class, or " "corresponding import path. Instead, it's type was " f"{type(deployment_def)}.") # Set the controller name so that serve.connect() in the user's # code will connect to the instance that this deployment is running # in. ray.serve.context.set_internal_replica_context( deployment_name, replica_tag, controller_name, servable_object=None, ) assert controller_name, "Must provide a valid controller_name" controller_handle = ray.get_actor(controller_name, namespace=SERVE_NAMESPACE) # This closure initializes user code and finalizes replica # startup. By splitting the initialization step like this, # we can already access this actor before the user code # has finished initializing. # The supervising state manager can then wait # for allocation of this replica by using the `is_allocated` # method. After that, it calls `reconfigure` to trigger # user code initialization. async def initialize_replica(): if is_function: _callable = deployment_def else: # This allows deployments to define an async __init__ # method (required for FastAPI). _callable = deployment_def.__new__(deployment_def) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.context.set_internal_replica_context( deployment_name, replica_tag, controller_name, servable_object=_callable, ) self.replica = RayServeReplica( _callable, deployment_name, replica_tag, deployment_config, deployment_config.user_config, version, is_function, controller_handle, ) # Is it fine that replica is None here? # Should we add a check in all methods that use self.replica # or, alternatively, create an async get_replica() method? self.replica = None self._initialize_replica = initialize_replica