def test_redeploy_start_time(serve_instance): """Check that redeploying a deployment doesn't reset its start time.""" controller = serve.context._global_client._controller @serve.deployment def test(_): return "1" serve.run(test.bind()) deployment_route = DeploymentRoute.FromString( ray.get(controller.get_deployment_info.remote("test"))) deployment_info_1 = DeploymentInfo.from_proto( deployment_route.deployment_info) start_time_ms_1 = deployment_info_1.start_time_ms time.sleep(0.1) @serve.deployment def test(_): return "2" serve.run(test.bind()) deployment_route = DeploymentRoute.FromString( ray.get(controller.get_deployment_info.remote("test"))) deployment_info_2 = DeploymentInfo.from_proto( deployment_route.deployment_info) start_time_ms_2 = deployment_info_2.start_time_ms assert start_time_ms_1 == start_time_ms_2
def get_deployment_info(self, name: str) -> Tuple[DeploymentInfo, str]: deployment_route = DeploymentRoute.FromString( ray.get(self._controller.get_deployment_info.remote(name))) return ( DeploymentInfo.from_proto(deployment_route.deployment_info), deployment_route.route if deployment_route.route != "" else None, )
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "ray._raylet.JobID", ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if prev_version is not None: existing_deployment_info = self.deployment_state_manager.get_deployment( name) if existing_deployment_info is None or not existing_deployment_info.version: raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_deployment_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_deployment_info.version}'.") autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None deployment_info = DeploymentInfo( actor_name=name, serialized_deployment_def=replica_config.serialized_deployment_def, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? goal_id, updating = self.deployment_state_manager.deploy( name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
def list_deployments(self) -> Dict[str, Tuple[DeploymentInfo, str]]: deployment_route_list = DeploymentRouteList.FromString( ray.get(self._controller.list_deployments.remote())) return { deployment_route.deployment_info.name: ( DeploymentInfo.from_proto(deployment_route.deployment_info), deployment_route.route if deployment_route.route != "" else None, ) for deployment_route in deployment_route_list.deployment_routes }
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, route_prefix: Optional[str], deployer_job_id: Union["ray._raylet.JobID", bytes], ) -> bool: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes ) version = deployment_config.version replica_config = ReplicaConfig.from_proto_bytes( replica_config_proto_bytes, deployment_config.needs_pickle() ) autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None if isinstance(deployer_job_id, bytes): deployer_job_id = ray.JobID.from_int( int.from_bytes(deployer_job_id, "little") ) deployment_info = DeploymentInfo( actor_name=name, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? updating = self.deployment_state_manager.deploy(name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) else: self.endpoint_state.delete_endpoint(name) return updating
def get_deployment_start_time(controller: ServeController, deployment: Deployment): """Return start time for given deployment""" deployment_route_list = DeploymentRouteList.FromString( ray.get(controller.list_deployments.remote()) ) deployments = { deployment_route.deployment_info.name: ( DeploymentInfo.from_proto(deployment_route.deployment_info), deployment_route.route if deployment_route.route != "" else None, ) for deployment_route in deployment_route_list.deployment_routes } deployment_info, _route_prefix = deployments[deployment.name] return deployment_info.start_time_ms
def __init__( self, controller_handle: ActorHandle, deployment_name: EndpointTag, handle_options: Optional[HandleOptions] = None, *, _router: Optional[Router] = None, _internal_pickled_http_request: bool = False, ): self.controller_handle = controller_handle self.deployment_name = deployment_name self.handle_options = handle_options or HandleOptions() self.handle_tag = f"{self.deployment_name}#{get_random_letters()}" self._pickled_http_request = _internal_pickled_http_request self.request_counter = metrics.Counter( "serve_handle_request_counter", description=("The number of handle.remote() calls that have been " "made on this handle."), tag_keys=("handle", "deployment"), ) self.request_counter.set_default_tags({ "handle": self.handle_tag, "deployment": self.deployment_name }) self.router: Router = _router or self._make_router() deployment_route = DeploymentRoute.FromString( ray.get( self.controller_handle.get_deployment_info.remote( self.deployment_name))) deployment_info = DeploymentInfo.from_proto( deployment_route.deployment_info) self._stop_event: Optional[threading.Event] = None self._pusher: Optional[threading.Thread] = None remote_func = self.controller_handle.record_handle_metrics.remote if deployment_info.deployment_config.autoscaling_config: self._stop_event = threading.Event() self._pusher = start_metrics_pusher( interval_s=HANDLE_METRIC_PUSH_INTERVAL_S, collection_callback=self._collect_handle_queue_metrics, metrics_process_func=remote_func, stop_event=self._stop_event, )