async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str], route_prefix: Optional[str]) -> Optional[GoalId]: if route_prefix is not None: assert route_prefix.startswith("/") python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo(worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) endpoint_info = EndpointInfo(ALL_HTTP_METHODS, route=route_prefix, python_methods=python_methods) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({name: 1.0})) return goal_id
def create_backend(self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig) -> Optional[GoalId]: # Ensures this method is idempotent. backend_info = self._backend_metadata.get(backend_tag) if backend_info is not None: if (backend_info.backend_config == backend_config and backend_info.replica_config == replica_config): return None backend_replica_class = create_backend_replica( replica_config.func_or_class) # Save creator that starts replicas, the arguments to be passed in, # and the configuration for the backends. backend_info = BackendInfo(worker_class=backend_replica_class, backend_config=backend_config, replica_config=replica_config) new_goal_id, existing_goal_id = self._set_backend_goal( backend_tag, backend_info) # NOTE(edoakes): we must write a checkpoint before starting new # or pushing the updated config to avoid inconsistent state if we # crash while making the change. self._checkpoint() self._notify_backend_configs_changed() if existing_goal_id is not None: self._goal_manager.complete_goal(existing_goal_id) return new_goal_id
async def deploy( self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, python_methods: List[str], version: Optional[str], route_prefix: Optional[str]) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") async with self.write_lock: backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config) goal_id, updating = self.backend_state.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(ALL_HTTP_METHODS, route=route_prefix, python_methods=python_methods, legacy=False) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({name: 1.0})) return goal_id, updating
async def create_backend(self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig) -> UUID: """Register a new backend under the specified tag.""" async with self.write_lock: # Ensures this method is idempotent. backend_info = self.backend_state.get_backend(backend_tag) if backend_info is not None: if (backend_info.backend_config == backend_config and backend_info.replica_config == replica_config): return backend_replica = create_backend_replica( replica_config.func_or_class) # Save creator that starts replicas, the arguments to be passed in, # and the configuration for the backends. backend_info = BackendInfo( worker_class=backend_replica, backend_config=backend_config, replica_config=replica_config) self.backend_state.add_backend(backend_tag, backend_info) metadata = backend_config.internal_metadata if metadata.autoscaling_config is not None: self.autoscaling_policies[ backend_tag] = BasicAutoscalingPolicy( backend_tag, metadata.autoscaling_config) try: # This call should be to run control loop self.actor_reconciler._scale_backend_replicas( self.backend_state.backends, backend_tag, backend_config.num_replicas) except RayServeException as e: del self.backend_state.backends[backend_tag] raise e return_uuid = self._create_event_with_result({ backend_tag: backend_info }) # NOTE(edoakes): we must write a checkpoint before starting new # or pushing the updated config to avoid inconsistent state if we # crash while making the change. self._checkpoint() await self.actor_reconciler._enqueue_pending_scale_changes_loop( self.backend_state) await self.actor_reconciler.backend_control_loop() self.notify_replica_handles_changed() # Set the backend config inside routers # (particularly for max_concurrent_queries). self.notify_backend_configs_changed() return return_uuid
async def create_backend( self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig) -> Optional[GoalId]: """Register a new backend under the specified tag.""" async with self.write_lock: backend_info = BackendInfo(worker_class=create_backend_replica( replica_config.backend_def), version=RESERVED_VERSION_TAG, backend_config=backend_config, replica_config=replica_config) return self.backend_state.deploy_backend(backend_tag, backend_info)
async def create_backend(self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig) -> None: """Register a new backend under the specified tag.""" async with self.write_lock: # Ensures this method is idempotent. backend_info = self.configuration_store.get_backend(backend_tag) if backend_info is not None: if (backend_info.backend_config == backend_config and backend_info.replica_config == replica_config): return backend_replica = create_backend_replica( replica_config.func_or_class) # Save creator that starts replicas, the arguments to be passed in, # and the configuration for the backends. self.configuration_store.add_backend( backend_tag, BackendInfo( worker_class=backend_replica, backend_config=backend_config, replica_config=replica_config)) metadata = backend_config.internal_metadata if metadata.autoscaling_config is not None: self.autoscaling_policies[ backend_tag] = BasicAutoscalingPolicy( backend_tag, metadata.autoscaling_config) try: self.actor_reconciler._scale_backend_replicas( self.configuration_store.backends, backend_tag, backend_config.num_replicas) except RayServeException as e: del self.configuration_store.backends[backend_tag] raise e # NOTE(edoakes): we must write a checkpoint before starting new # or pushing the updated config to avoid inconsistent state if we # crash while making the change. self._checkpoint() await self.actor_reconciler._start_pending_backend_replicas( self.configuration_store) # Set the backend config inside the router # (particularly for max-batch-size). await asyncio.gather(*[ router.set_backend_config.remote(backend_tag, backend_config) for router in self.actor_reconciler.router_handles() ]) await self.broadcast_backend_config(backend_tag)
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str]) -> Optional[GoalId]: # By default the path prefix is the deployment name. if replica_config.path_prefix is None: replica_config.path_prefix = f"/{name}" # Backend config should be synchronized so the backend worker # is aware of it. backend_config.internal_metadata.path_prefix = f"/{name}" else: if ("{" in replica_config.path_prefix or "}" in replica_config.path_prefix): raise ValueError( "Wildcard routes are not supported for deployment paths. " "Please use @serve.ingress with FastAPI instead.") if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. # {wildcard:path} is used so HTTPProxy's Starlette router can match # arbitrary path. path_prefix = replica_config.path_prefix if path_prefix.endswith("/"): path_prefix = path_prefix[:-1] http_route = path_prefix + WILDCARD_PATH_SUFFIX http_methods = ALL_HTTP_METHODS else: http_route = replica_config.path_prefix # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo(worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) self.endpoint_state.create_endpoint(name, http_route, http_methods, TrafficPolicy({name: 1.0}), python_methods=python_methods) return goal_id
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str]) -> Optional[GoalId]: # By default the path prefix is the deployment name. if replica_config.path_prefix is None: replica_config.path_prefix = f"/{name}" # Backend config should be synchronized so the backend worker # is aware of it. backend_config.internal_metadata.path_prefix = f"/{name}" if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. # {wildcard:path} is used so HTTPProxy's Starlette router can match # arbitrary path. http_route = f"{replica_config.path_prefix}" + "/{wildcard:path}" # https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods http_methods = [ "GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH" ] else: http_route = replica_config.path_prefix # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo( worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) self.endpoint_state.create_endpoint( name, http_route, http_methods, TrafficPolicy({ name: 1.0 }), python_methods=python_methods) return goal_id
async def create_backend( self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig) -> Optional[GoalId]: """Register a new backend under the specified tag.""" async with self.write_lock: backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(backend_tag, replica_config.serialized_backend_def)), version=RESERVED_VERSION_TAG, backend_config=backend_config, replica_config=replica_config) goal_id, _ = self.backend_state.deploy_backend( backend_tag, backend_info) return goal_id
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, python_methods: List[str], version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") async with self.write_lock: if prev_version is not None: existing_backend_info = self.backend_state.get_backend(name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo( actor_def=ray.remote( create_backend_replica( name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) goal_id, updating = self.backend_state.deploy_backend( name, backend_info) endpoint_info = EndpointInfo( ALL_HTTP_METHODS, route=route_prefix, python_methods=python_methods, legacy=False) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({ name: 1.0 })) return goal_id, updating
def deploy( self, name: str, backend_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError(f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # This is probably not the desired behavior for an autoscaling # deployment, which redeploys very often to change num_replicas. goal_id, updating = self.backend_state_manager.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
def deploy_backend(self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str] = None) -> Optional[GoalId]: # Ensures this method is idempotent. backend_info = self._backend_metadata.get(backend_tag) if backend_info is not None: # Old codepath. if version is None: if (backend_info.backend_config == backend_config and backend_info.replica_config == replica_config): return self._backend_goals.get(backend_tag, None) # New codepath: treat version as ground truth for implementation. else: if (backend_info.backend_config == backend_config and self._target_versions[backend_tag] == version): return self._backend_goals.get(backend_tag, None) if backend_tag not in self._replicas: self._replicas[backend_tag] = ReplicaStateContainer() backend_replica_class = create_backend_replica( replica_config.backend_def) # Save creator that starts replicas, the arguments to be passed in, # and the configuration for the backends. backend_info = BackendInfo(worker_class=backend_replica_class, backend_config=backend_config, replica_config=replica_config) new_goal_id, existing_goal_id = self._set_backend_goal( backend_tag, backend_info, version) # NOTE(edoakes): we must write a checkpoint before starting new # or pushing the updated config to avoid inconsistent state if we # crash while making the change. self._checkpoint() self._notify_backend_configs_changed(backend_tag) if existing_goal_id is not None: self._goal_manager.complete_goal(existing_goal_id) return new_goal_id
async def create_backend(self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig) -> UUID: """Register a new backend under the specified tag.""" async with self.write_lock: # Ensures this method is idempotent. backend_info = self.backend_state.get_backend(backend_tag) if backend_info is not None: if (backend_info.backend_config == backend_config and backend_info.replica_config == replica_config): return backend_replica = create_backend_replica( replica_config.func_or_class) # Save creator that starts replicas, the arguments to be passed in, # and the configuration for the backends. backend_info = BackendInfo( worker_class=backend_replica, backend_config=backend_config, replica_config=replica_config) return_uuid = self._create_event_with_result({ backend_tag: backend_info }) await self.set_backend_goal(backend_tag, backend_info, return_uuid) try: # This call should be to run control loop self.backend_state.scale_backend_replicas( backend_tag, backend_config.num_replicas) except RayServeException as e: del self.backend_state.backends[backend_tag] raise e # NOTE(edoakes): we must write a checkpoint before starting new # or pushing the updated config to avoid inconsistent state if we # crash while making the change. self._checkpoint() self.notify_backend_configs_changed() return return_uuid
async def create_backend( self, backend_tag: BackendTag, backend_config: BackendConfig, replica_config: ReplicaConfig, deployer_job_id: Optional["Optional[ray._raylet.JobID]"] = None ) -> Optional[GoalId]: """Register a new backend under the specified tag.""" async with self.write_lock: backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(backend_tag, replica_config.serialized_backend_def)), version=RESERVED_VERSION_TAG, backend_config=backend_config, replica_config=replica_config, start_time_ms=int(time.time() * 1000), deployer_job_id=deployer_job_id) goal_id, _ = self.backend_state.deploy_backend( backend_tag, backend_info) return goal_id
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str], route_prefix: Optional[str]) -> Optional[GoalId]: if route_prefix is None: route_prefix = f"/{name}" if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. # {wildcard:path} is used so HTTPProxy's Starlette router can match # arbitrary path. if route_prefix.endswith("/"): route_prefix = route_prefix[:-1] http_route = route_prefix + WILDCARD_PATH_SUFFIX http_methods = ALL_HTTP_METHODS else: http_route = route_prefix # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo(worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) self.endpoint_state.update_endpoint(name, http_route, http_methods, TrafficPolicy({name: 1.0}), python_methods=python_methods) return goal_id
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str], route_prefix: Optional[str]) -> Optional[GoalId]: if route_prefix is not None: assert route_prefix.startswith("/") if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. http_methods = ALL_HTTP_METHODS else: # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo( worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) endpoint_info = EndpointInfo( http_methods, route=route_prefix, python_methods=python_methods) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({ name: 1.0 })) return goal_id
async def __init__(self): self.worker = object.__new__(create_backend_replica(backend_def)) await self.worker.__init__(name, name + ":tag", init_args, backend_config, controller_name)
def __init__(self): self.worker = create_backend_replica(func_or_class)( name, name + ":tag", init_args, backend_config, controller_name)