def test_with_proto(): # Test roundtrip config = DeploymentConfig(num_replicas=100, max_concurrent_queries=16) assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes()) # Test user_config object config = DeploymentConfig(user_config={"python": ("native", ["objects"])}) assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "ray._raylet.JobID", ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if prev_version is not None: existing_deployment_info = self.deployment_state_manager.get_deployment( name) if existing_deployment_info is None or not existing_deployment_info.version: raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_deployment_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_deployment_info.version}'.") autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None deployment_info = DeploymentInfo( actor_name=name, serialized_deployment_def=replica_config.serialized_deployment_def, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? goal_id, updating = self.deployment_state_manager.deploy( name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, route_prefix: Optional[str], deployer_job_id: Union["ray._raylet.JobID", bytes], ) -> bool: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes ) version = deployment_config.version replica_config = ReplicaConfig.from_proto_bytes( replica_config_proto_bytes, deployment_config.needs_pickle() ) autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None if isinstance(deployer_job_id, bytes): deployer_job_id = ray.JobID.from_int( int.from_bytes(deployer_job_id, "little") ) deployment_info = DeploymentInfo( actor_name=name, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? updating = self.deployment_state_manager.deploy(name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) else: self.endpoint_state.delete_endpoint(name) return updating
def test_zero_default_proto(): # Test that options set to zero (protobuf default value) still retain their # original value after being serialized and deserialized. config = DeploymentConfig( autoscaling_config={ "min_replicas": 1, "max_replicas": 2, "smoothing_factor": 0.123, "downscale_delay_s": 0 }) serialized_config = config.to_proto_bytes() deserialized_config = DeploymentConfig.from_proto_bytes(serialized_config) new_delay_s = deserialized_config.autoscaling_config.downscale_delay_s assert new_delay_s == 0 # Check that this test is not spuriously passing. default_downscale_delay_s = AutoscalingConfig().downscale_delay_s assert new_delay_s != default_downscale_delay_s
async def __init__( self, deployment_name, replica_tag, serialized_deployment_def: bytes, serialized_init_args: bytes, serialized_init_kwargs: bytes, deployment_config_proto_bytes: bytes, version: DeploymentVersion, controller_name: str, detached: bool, ): configure_component_logger( component_type="deployment", component_name=deployment_name, component_id=replica_tag, ) deployment_def = cloudpickle.loads(serialized_deployment_def) if isinstance(deployment_def, str): import_path = deployment_def module_name, attr_name = parse_import_path(import_path) deployment_def = getattr(import_module(module_name), attr_name) # For ray or serve decorated class or function, strip to return # original body if isinstance(deployment_def, RemoteFunction): deployment_def = deployment_def._function elif isinstance(deployment_def, ActorClass): deployment_def = deployment_def.__ray_metadata__.modified_class elif isinstance(deployment_def, Deployment): logger.warning( f'The import path "{import_path}" contains a ' "decorated Serve deployment. The decorator's settings " "are ignored when deploying via import path.") deployment_def = deployment_def.func_or_class init_args = cloudpickle.loads(serialized_init_args) init_kwargs = cloudpickle.loads(serialized_init_kwargs) deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if inspect.isfunction(deployment_def): is_function = True elif inspect.isclass(deployment_def): is_function = False else: assert False, ( "deployment_def must be function, class, or " "corresponding import path. Instead, it's type was " f"{type(deployment_def)}.") # Set the controller name so that serve.connect() in the user's # code will connect to the instance that this deployment is running # in. ray.serve.context.set_internal_replica_context( deployment_name, replica_tag, controller_name, servable_object=None, ) assert controller_name, "Must provide a valid controller_name" controller_handle = ray.get_actor(controller_name, namespace=SERVE_NAMESPACE) # This closure initializes user code and finalizes replica # startup. By splitting the initialization step like this, # we can already access this actor before the user code # has finished initializing. # The supervising state manager can then wait # for allocation of this replica by using the `is_allocated` # method. After that, it calls `reconfigure` to trigger # user code initialization. async def initialize_replica(): if is_function: _callable = deployment_def else: # This allows deployments to define an async __init__ # method (required for FastAPI). _callable = deployment_def.__new__(deployment_def) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.context.set_internal_replica_context( deployment_name, replica_tag, controller_name, servable_object=_callable, ) self.replica = RayServeReplica( _callable, deployment_name, replica_tag, deployment_config, deployment_config.user_config, version, is_function, controller_handle, ) # Is it fine that replica is None here? # Should we add a check in all methods that use self.replica # or, alternatively, create an async get_replica() method? self.replica = None self._initialize_replica = initialize_replica
async def __init__( self, deployment_name, replica_tag, init_args, init_kwargs, deployment_config_proto_bytes: bytes, version: DeploymentVersion, controller_name: str, controller_namespace: str, detached: bool, ): if import_path is not None: module_name, attr_name = parse_import_path(import_path) deployment_def = getattr(import_module(module_name), attr_name) else: deployment_def = cloudpickle.loads(serialized_deployment_def) deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if inspect.isfunction(deployment_def): is_function = True elif inspect.isclass(deployment_def): is_function = False else: assert False, ( "deployment_def must be function, class, or " "corresponding import path. Instead, it's type was " f"{type(deployment_def)}.") # Set the controller name so that serve.connect() in the user's # code will connect to the instance that this deployment is running # in. ray.serve.api._set_internal_replica_context( deployment_name, replica_tag, controller_name, controller_namespace, servable_object=None, ) assert controller_name, "Must provide a valid controller_name" controller_handle = ray.get_actor(controller_name, namespace=controller_namespace) # This closure initializes user code and finalizes replica # startup. By splitting the initialization step like this, # we can already access this actor before the user code # has finished initializing. # The supervising state manager can then wait # for allocation of this replica by using the `is_allocated` # method. After that, it calls `reconfigure` to trigger # user code initialization. async def initialize_replica(): if is_function: _callable = deployment_def else: # This allows deployments to define an async __init__ # method (required for FastAPI). _callable = deployment_def.__new__(deployment_def) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.api._set_internal_replica_context( deployment_name, replica_tag, controller_name, controller_namespace, servable_object=_callable, ) self.replica = RayServeReplica( _callable, deployment_name, replica_tag, deployment_config, deployment_config.user_config, version, is_function, controller_handle, ) # Is it fine that replica is None here? # Should we add a check in all methods that use self.replica # or, alternatively, create an async get_replica() method? self.replica = None self._initialize_replica = initialize_replica