예제 #1
0
    def deploy(
            self,
            name: str,
            deployment_def: Union[Callable, Type[Callable], str],
            init_args: Tuple[Any],
            init_kwargs: Dict[Any, Any],
            ray_actor_options: Optional[Dict] = None,
            config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
            version: Optional[str] = None,
            prev_version: Optional[str] = None,
            route_prefix: Optional[str] = None,
            url: Optional[str] = None,
            _blocking: Optional[bool] = True) -> Optional[GoalId]:
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        curr_job_env = ray.get_runtime_context().runtime_env
        if "runtime_env" in ray_actor_options:
            ray_actor_options["runtime_env"].setdefault(
                "working_dir", curr_job_env.get("working_dir"))
        else:
            ray_actor_options["runtime_env"] = curr_job_env

        replica_config = ReplicaConfig(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options)

        if isinstance(config, dict):
            deployment_config = DeploymentConfig.parse_obj(config)
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError(
                "config must be a DeploymentConfig or a dictionary.")

        if deployment_config.autoscaling_config is not None and \
            deployment_config.max_concurrent_queries < deployment_config. \
                autoscaling_config.target_num_ongoing_requests_per_replica:
            logger.warning("Autoscaling will never happen, "
                           "because 'max_concurrent_queries' is less than "
                           "'target_num_ongoing_requests_per_replica' now.")

        goal_id, updating = ray.get(
            self._controller.deploy.remote(name,
                                           deployment_config.to_proto_bytes(),
                                           replica_config, version,
                                           prev_version, route_prefix,
                                           ray.get_runtime_context().job_id))

        tag = f"component=serve deployment={name}"

        if updating:
            msg = f"Updating deployment '{name}'"
            if version is not None:
                msg += f" to version '{version}'"
            logger.info(f"{msg}. {tag}")
        else:
            logger.info(f"Deployment '{name}' is already at version "
                        f"'{version}', not updating. {tag}")

        if _blocking:
            self._wait_for_goal(goal_id)

            if url is not None:
                url_part = f" at `{url}`"
            else:
                url_part = ""
            logger.info(
                f"Deployment '{name}{':'+version if version else ''}' is ready"
                f"{url_part}. {tag}")
        else:
            return goal_id
예제 #2
0
    def get_deploy_args(
        self,
        name: str,
        deployment_def: Union[Callable, Type[Callable], str],
        init_args: Tuple[Any],
        init_kwargs: Dict[Any, Any],
        ray_actor_options: Optional[Dict] = None,
        config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
        version: Optional[str] = None,
        route_prefix: Optional[str] = None,
    ) -> Dict:
        """
        Takes a deployment's configuration, and returns the arguments needed
        for the controller to deploy it.
        """

        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        curr_job_env = ray.get_runtime_context().runtime_env
        if "runtime_env" in ray_actor_options:
            # It is illegal to set field working_dir to None.
            if curr_job_env.get("working_dir") is not None:
                ray_actor_options["runtime_env"].setdefault(
                    "working_dir", curr_job_env.get("working_dir"))
        else:
            ray_actor_options["runtime_env"] = curr_job_env

        replica_config = ReplicaConfig.create(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options,
        )

        if isinstance(config, dict):
            deployment_config = DeploymentConfig.parse_obj(config)
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError(
                "config must be a DeploymentConfig or a dictionary.")

        deployment_config.version = version

        if (deployment_config.autoscaling_config is not None
                and deployment_config.max_concurrent_queries <
                deployment_config.autoscaling_config.
                target_num_ongoing_requests_per_replica  # noqa: E501
            ):
            logger.warning("Autoscaling will never happen, "
                           "because 'max_concurrent_queries' is less than "
                           "'target_num_ongoing_requests_per_replica' now.")

        controller_deploy_args = {
            "name": name,
            "deployment_config_proto_bytes":
            deployment_config.to_proto_bytes(),
            "replica_config_proto_bytes": replica_config.to_proto_bytes(),
            "route_prefix": route_prefix,
            "deployer_job_id": ray.get_runtime_context().job_id,
        }

        return controller_deploy_args