Exemple #1
0
def test_with_proto():
    # Test roundtrip
    config = DeploymentConfig(num_replicas=100, max_concurrent_queries=16)
    assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())

    # Test user_config object
    config = DeploymentConfig(user_config={"python": ("native", ["objects"])})
    assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())
Exemple #2
0
    def from_proto(cls, proto: DeploymentInfoProto):
        deployment_config = (DeploymentConfig.from_proto(
            proto.deployment_config) if proto.deployment_config else None)
        data = {
            "deployment_config":
            deployment_config,
            "replica_config":
            ReplicaConfig.from_proto(
                proto.replica_config,
                deployment_config.deployment_language
                if deployment_config else DeploymentLanguage.PYTHON,
            ),
            "start_time_ms":
            proto.start_time_ms,
            "actor_name":
            proto.actor_name if proto.actor_name != "" else None,
            "serialized_deployment_def":
            proto.serialized_deployment_def
            if proto.serialized_deployment_def != b"" else None,
            "version":
            proto.version if proto.version != "" else None,
            "end_time_ms":
            proto.end_time_ms if proto.end_time_ms != 0 else None,
            "deployer_job_id":
            ray.get_runtime_context().job_id,
        }

        return cls(**data)
Exemple #3
0
def test_invalid_use_sync_handle():
    deployment = Deployment(
        Actor,
        "test",
        DeploymentConfig(),
        _internal=True,
    )
    with pytest.raises(
        ValueError,
        match=f"{USE_SYNC_HANDLE_KEY} should only be set with a boolean value",
    ):
        _ = DeploymentNode(
            Actor,
            "test",
            [],
            {},
            {},
            other_args_to_resolve={USE_SYNC_HANDLE_KEY: {"options_a": "hii"}},
        )
    with pytest.raises(
        ValueError,
        match=f"{USE_SYNC_HANDLE_KEY} should only be set with a boolean value",
    ):
        _ = DeploymentMethodNode(
            deployment,
            "method",
            [],
            {},
            {},
            other_args_to_resolve={
                USE_SYNC_HANDLE_KEY: None,
            },
        )
Exemple #4
0
    def deploy(
        self,
        name: str,
        deployment_config_proto_bytes: bytes,
        replica_config: ReplicaConfig,
        version: Optional[str],
        prev_version: Optional[str],
        route_prefix: Optional[str],
        deployer_job_id: "ray._raylet.JobID",
    ) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        deployment_config = DeploymentConfig.from_proto_bytes(
            deployment_config_proto_bytes)

        if prev_version is not None:
            existing_deployment_info = self.deployment_state_manager.get_deployment(
                name)
            if existing_deployment_info is None or not existing_deployment_info.version:
                raise ValueError(
                    f"prev_version '{prev_version}' is specified but "
                    "there is no existing deployment.")
            if existing_deployment_info.version != prev_version:
                raise ValueError(
                    f"prev_version '{prev_version}' "
                    "does not match with the existing "
                    f"version '{existing_deployment_info.version}'.")

        autoscaling_config = deployment_config.autoscaling_config
        if autoscaling_config is not None:
            # TODO: is this the desired behaviour? Should this be a setting?
            deployment_config.num_replicas = autoscaling_config.min_replicas

            autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config)
        else:
            autoscaling_policy = None

        deployment_info = DeploymentInfo(
            actor_name=name,
            serialized_deployment_def=replica_config.serialized_deployment_def,
            version=version,
            deployment_config=deployment_config,
            replica_config=replica_config,
            deployer_job_id=deployer_job_id,
            start_time_ms=int(time.time() * 1000),
            autoscaling_policy=autoscaling_policy,
        )
        # TODO(architkulkarni): When a deployment is redeployed, even if
        # the only change was num_replicas, the start_time_ms is refreshed.
        # Is this the desired behaviour?

        goal_id, updating = self.deployment_state_manager.deploy(
            name, deployment_info)

        if route_prefix is not None:
            endpoint_info = EndpointInfo(route=route_prefix)
            self.endpoint_state.update_endpoint(name, endpoint_info)

        return goal_id, updating
Exemple #5
0
    def from_proto(cls, proto: DeploymentInfoProto):
        deployment_config = (DeploymentConfig.from_proto(
            proto.deployment_config) if proto.deployment_config else None)
        data = {
            "deployment_config":
            deployment_config,
            "replica_config":
            ReplicaConfig.from_proto(
                proto.replica_config,
                deployment_config.needs_pickle()
                if deployment_config else True,
            ),
            "start_time_ms":
            proto.start_time_ms,
            "actor_name":
            proto.actor_name if proto.actor_name != "" else None,
            "version":
            proto.version if proto.version != "" else None,
            "end_time_ms":
            proto.end_time_ms if proto.end_time_ms != 0 else None,
            "deployer_job_id":
            ray.get_runtime_context().job_id,
        }

        return cls(**data)
Exemple #6
0
def schema_to_deployment(s: DeploymentSchema) -> Deployment:
    if s.ray_actor_options is None:
        ray_actor_options = None
    else:
        ray_actor_options = s.ray_actor_options.dict(exclude_unset=True)

    config = DeploymentConfig.from_default(
        ignore_none=True,
        num_replicas=s.num_replicas,
        user_config=s.user_config,
        max_concurrent_queries=s.max_concurrent_queries,
        autoscaling_config=s.autoscaling_config,
        graceful_shutdown_wait_loop_s=s.graceful_shutdown_wait_loop_s,
        graceful_shutdown_timeout_s=s.graceful_shutdown_timeout_s,
        health_check_period_s=s.health_check_period_s,
        health_check_timeout_s=s.health_check_timeout_s,
    )

    return Deployment(
        func_or_class=s.import_path,
        name=s.name,
        config=config,
        init_args=(),
        init_kwargs={},
        route_prefix=s.route_prefix,
        ray_actor_options=ray_actor_options,
        _internal=True,
    )
    def __init__(
        self,
        func_body: Union[Callable, str],
        deployment_name,
        func_args,
        func_kwargs,
        func_options,
        other_args_to_resolve=None,
    ):
        self._body = func_body
        self._deployment_name = deployment_name
        super().__init__(
            func_args,
            func_kwargs,
            func_options,
            other_args_to_resolve=other_args_to_resolve,
        )
        if "deployment_schema" in self._bound_other_args_to_resolve:
            deployment_schema: DeploymentSchema = self._bound_other_args_to_resolve[
                "deployment_schema"
            ]
            deployment_shell = schema_to_deployment(deployment_schema)

            # Prefer user specified name to override the generated one.
            if (
                inspect.isfunction(func_body)
                and deployment_shell.name != func_body.__name__
            ):
                self._deployment_name = deployment_shell.name

            # Set the route prefix, prefer the one user supplied,
            # otherwise set it to /deployment_name
            if (
                deployment_shell.route_prefix is None
                or deployment_shell.route_prefix != f"/{deployment_shell.name}"
            ):
                route_prefix = deployment_shell.route_prefix
            else:
                route_prefix = f"/{deployment_name}"

            self._deployment = deployment_shell.options(
                func_or_class=func_body,
                name=self._deployment_name,
                init_args=(),
                init_kwargs={},
                route_prefix=route_prefix,
            )
        else:
            self._deployment: Deployment = Deployment(
                func_body,
                deployment_name,
                DeploymentConfig(),
                init_args=tuple(),
                init_kwargs=dict(),
                ray_actor_options=func_options,
                _internal=True,
            )
        # TODO (jiaodong): Polish with async handle support later
        self._deployment_handle = RayServeLazySyncHandle(self._deployment.name)
Exemple #8
0
def test_zero_default_proto():
    # Test that options set to zero (protobuf default value) still retain their
    # original value after being serialized and deserialized.
    config = DeploymentConfig(
        autoscaling_config={
            "min_replicas": 1,
            "max_replicas": 2,
            "smoothing_factor": 0.123,
            "downscale_delay_s": 0
        })
    serialized_config = config.to_proto_bytes()
    deserialized_config = DeploymentConfig.from_proto_bytes(serialized_config)
    new_delay_s = deserialized_config.autoscaling_config.downscale_delay_s
    assert new_delay_s == 0

    # Check that this test is not spuriously passing.
    default_downscale_delay_s = AutoscalingConfig().downscale_delay_s
    assert new_delay_s != default_downscale_delay_s
Exemple #9
0
    def test_from_default(self, ignore_none):
        """Check from_default() method behavior."""

        # Valid parameters
        dc = DeploymentConfig.from_default(ignore_none=ignore_none,
                                           num_replicas=5,
                                           is_cross_language=True)
        assert dc.num_replicas == 5
        assert dc.is_cross_language is True

        # Invalid parameters should raise TypeError
        with pytest.raises(TypeError):
            DeploymentConfig.from_default(ignore_none=ignore_none,
                                          num_replicas=5,
                                          is_xlang=True)

        # Validation should still be performed
        with pytest.raises(ValidationError):
            DeploymentConfig.from_default(ignore_none=ignore_none,
                                          num_replicas="hello world")
Exemple #10
0
    def deploy(
        self,
        name: str,
        deployment_config_proto_bytes: bytes,
        replica_config_proto_bytes: bytes,
        route_prefix: Optional[str],
        deployer_job_id: Union["ray._raylet.JobID", bytes],
    ) -> bool:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        deployment_config = DeploymentConfig.from_proto_bytes(
            deployment_config_proto_bytes
        )
        version = deployment_config.version
        replica_config = ReplicaConfig.from_proto_bytes(
            replica_config_proto_bytes, deployment_config.needs_pickle()
        )

        autoscaling_config = deployment_config.autoscaling_config
        if autoscaling_config is not None:
            # TODO: is this the desired behaviour? Should this be a setting?
            deployment_config.num_replicas = autoscaling_config.min_replicas

            autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config)
        else:
            autoscaling_policy = None
        if isinstance(deployer_job_id, bytes):
            deployer_job_id = ray.JobID.from_int(
                int.from_bytes(deployer_job_id, "little")
            )
        deployment_info = DeploymentInfo(
            actor_name=name,
            version=version,
            deployment_config=deployment_config,
            replica_config=replica_config,
            deployer_job_id=deployer_job_id,
            start_time_ms=int(time.time() * 1000),
            autoscaling_policy=autoscaling_policy,
        )
        # TODO(architkulkarni): When a deployment is redeployed, even if
        # the only change was num_replicas, the start_time_ms is refreshed.
        # Is this the desired behaviour?
        updating = self.deployment_state_manager.deploy(name, deployment_info)

        if route_prefix is not None:
            endpoint_info = EndpointInfo(route=route_prefix)
            self.endpoint_state.update_endpoint(name, endpoint_info)
        else:
            self.endpoint_state.delete_endpoint(name)

        return updating
Exemple #11
0
 def from_json(cls, input_json, object_hook=None):
     assert input_json[DAGNODE_TYPE_KEY] == DeploymentMethodNode.__name__
     return cls(
         Deployment(
             input_json["import_path"],
             input_json["deployment_name"],
             # TODO: (jiaodong) Support deployment config from user input
             DeploymentConfig(),
             init_args=input_json["args"],
             init_kwargs=input_json["kwargs"],
             ray_actor_options=input_json["options"],
             _internal=True,
         ),
         input_json["deployment_method_name"],
         input_json["args"],
         input_json["kwargs"],
         input_json["options"],
         other_args_to_resolve=input_json["other_args_to_resolve"],
     )
Exemple #12
0
    def test_from_default_ignore_none(self):
        """Check from_default()'s ignore_none parameter"""

        default = DeploymentConfig()

        # Valid parameter with None passed in should be ignored
        dc = DeploymentConfig.from_default(ignore_none=True, num_replicas=None)

        # Invalid parameter should raise TypeError no matter what
        with pytest.raises(TypeError):
            DeploymentConfig.from_default(ignore_none=True, fake=5)
        with pytest.raises(TypeError):
            DeploymentConfig.from_default(ignore_none=False, fake=5)

        # Validators should run no matter what
        dc = DeploymentConfig.from_default(ignore_none=True,
                                           max_concurrent_queries=None)
        assert dc.max_concurrent_queries == default.max_concurrent_queries
        dc = DeploymentConfig.from_default(ignore_none=False,
                                           max_concurrent_queries=None)
        assert dc.max_concurrent_queries is not None
        assert dc.max_concurrent_queries == default.max_concurrent_queries
Exemple #13
0
def schema_to_deployment(s: DeploymentSchema) -> Deployment:
    """Creates a deployment with parameters specified in schema.

    The returned deployment CANNOT be deployed immediately. It's func_or_class
    value is an empty string (""), which is not a valid import path. The
    func_or_class value must be overwritten with a valid function or class
    before the deployment can be deployed.
    """

    if s.ray_actor_options is None:
        ray_actor_options = None
    else:
        ray_actor_options = s.ray_actor_options.dict(exclude_unset=True)

    config = DeploymentConfig.from_default(
        ignore_none=True,
        num_replicas=s.num_replicas,
        user_config=s.user_config,
        max_concurrent_queries=s.max_concurrent_queries,
        autoscaling_config=s.autoscaling_config,
        graceful_shutdown_wait_loop_s=s.graceful_shutdown_wait_loop_s,
        graceful_shutdown_timeout_s=s.graceful_shutdown_timeout_s,
        health_check_period_s=s.health_check_period_s,
        health_check_timeout_s=s.health_check_timeout_s,
    )

    return Deployment(
        func_or_class="",
        name=s.name,
        config=config,
        init_args=(),
        init_kwargs={},
        route_prefix=s.route_prefix,
        ray_actor_options=ray_actor_options,
        _internal=True,
    )
Exemple #14
0
        async def __init__(
            self,
            deployment_name,
            replica_tag,
            serialized_deployment_def: bytes,
            serialized_init_args: bytes,
            serialized_init_kwargs: bytes,
            deployment_config_proto_bytes: bytes,
            version: DeploymentVersion,
            controller_name: str,
            detached: bool,
        ):
            configure_component_logger(
                component_type="deployment",
                component_name=deployment_name,
                component_id=replica_tag,
            )

            deployment_def = cloudpickle.loads(serialized_deployment_def)

            if isinstance(deployment_def, str):
                import_path = deployment_def
                module_name, attr_name = parse_import_path(import_path)
                deployment_def = getattr(import_module(module_name), attr_name)
                # For ray or serve decorated class or function, strip to return
                # original body
                if isinstance(deployment_def, RemoteFunction):
                    deployment_def = deployment_def._function
                elif isinstance(deployment_def, ActorClass):
                    deployment_def = deployment_def.__ray_metadata__.modified_class
                elif isinstance(deployment_def, Deployment):
                    logger.warning(
                        f'The import path "{import_path}" contains a '
                        "decorated Serve deployment. The decorator's settings "
                        "are ignored when deploying via import path.")
                    deployment_def = deployment_def.func_or_class

            init_args = cloudpickle.loads(serialized_init_args)
            init_kwargs = cloudpickle.loads(serialized_init_kwargs)

            deployment_config = DeploymentConfig.from_proto_bytes(
                deployment_config_proto_bytes)

            if inspect.isfunction(deployment_def):
                is_function = True
            elif inspect.isclass(deployment_def):
                is_function = False
            else:
                assert False, (
                    "deployment_def must be function, class, or "
                    "corresponding import path. Instead, it's type was "
                    f"{type(deployment_def)}.")

            # Set the controller name so that serve.connect() in the user's
            # code will connect to the instance that this deployment is running
            # in.
            ray.serve.context.set_internal_replica_context(
                deployment_name,
                replica_tag,
                controller_name,
                servable_object=None,
            )

            assert controller_name, "Must provide a valid controller_name"

            controller_handle = ray.get_actor(controller_name,
                                              namespace=SERVE_NAMESPACE)

            # This closure initializes user code and finalizes replica
            # startup. By splitting the initialization step like this,
            # we can already access this actor before the user code
            # has finished initializing.
            # The supervising state manager can then wait
            # for allocation of this replica by using the `is_allocated`
            # method. After that, it calls `reconfigure` to trigger
            # user code initialization.
            async def initialize_replica():
                if is_function:
                    _callable = deployment_def
                else:
                    # This allows deployments to define an async __init__
                    # method (required for FastAPI).
                    _callable = deployment_def.__new__(deployment_def)
                    await sync_to_async(_callable.__init__)(*init_args,
                                                            **init_kwargs)

                # Setting the context again to update the servable_object.
                ray.serve.context.set_internal_replica_context(
                    deployment_name,
                    replica_tag,
                    controller_name,
                    servable_object=_callable,
                )

                self.replica = RayServeReplica(
                    _callable,
                    deployment_name,
                    replica_tag,
                    deployment_config,
                    deployment_config.user_config,
                    version,
                    is_function,
                    controller_handle,
                )

            # Is it fine that replica is None here?
            # Should we add a check in all methods that use self.replica
            # or, alternatively, create an async get_replica() method?
            self.replica = None
            self._initialize_replica = initialize_replica
Exemple #15
0
def deployment(
        _func_or_class: Optional[Callable] = None,
        name: Optional[str] = None,
        version: Optional[str] = None,
        prev_version: Optional[str] = None,
        num_replicas: Optional[int] = None,
        init_args: Optional[Tuple[Any]] = None,
        init_kwargs: Optional[Dict[Any, Any]] = None,
        route_prefix: Optional[str] = None,
        ray_actor_options: Optional[Dict] = None,
        user_config: Optional[Any] = None,
        max_concurrent_queries: Optional[int] = None,
        _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None,
        _graceful_shutdown_wait_loop_s: Optional[float] = None,
        _graceful_shutdown_timeout_s: Optional[float] = None
) -> Callable[[Callable], Deployment]:
    """Define a Serve deployment.

    Args:
        name (Optional[str]): Globally-unique name identifying this deployment.
            If not provided, the name of the class or function will be used.
        version (Optional[str]): Version of the deployment. This is used to
            indicate a code change for the deployment; when it is re-deployed
            with a version change, a rolling update of the replicas will be
            performed. If not provided, every deployment will be treated as a
            new version.
        prev_version (Optional[str]): Version of the existing deployment which
            is used as a precondition for the next deployment. If prev_version
            does not match with the existing deployment's version, the
            deployment will fail. If not provided, deployment procedure will
            not check the existing deployment's version.
        num_replicas (Optional[int]): The number of processes to start up that
            will handle requests to this deployment. Defaults to 1.
        init_args (Optional[Tuple]): Positional args to be passed to the class
            constructor when starting up deployment replicas. These can also be
            passed when you call `.deploy()` on the returned Deployment.
        init_kwargs (Optional[Dict]): Keyword args to be passed to the class
            constructor when starting up deployment replicas. These can also be
            passed when you call `.deploy()` on the returned Deployment.
        route_prefix (Optional[str]): Requests to paths under this HTTP path
            prefix will be routed to this deployment. Defaults to '/{name}'.
            Routing is done based on longest-prefix match, so if you have
            deployment A with a prefix of '/a' and deployment B with a prefix
            of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests
            to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with
            a '/' unless they're the root (just '/'), which acts as a
            catch-all.
        ray_actor_options (dict): Options to be passed to the Ray actor
            constructor such as resource requirements.
        user_config (Optional[Any]): [experimental] Config to pass to the
            reconfigure method of the deployment. This can be updated
            dynamically without changing the version of the deployment and
            restarting its replicas. The user_config needs to be hashable to
            keep track of updates, so it must only contain hashable types, or
            hashable types nested in lists and dictionaries.
        max_concurrent_queries (Optional[int]): The maximum number of queries
            that will be sent to a replica of this deployment without receiving
            a response. Defaults to 100.

    Example:

    >>> @serve.deployment(name="deployment1", version="v1")
        class MyDeployment:
            pass

    >>> MyDeployment.deploy(*init_args)
    >>> MyDeployment.options(num_replicas=2, init_args=init_args).deploy()

    Returns:
        Deployment
    """

    if num_replicas is not None \
            and _autoscaling_config is not None:
        raise ValueError("Manually setting num_replicas is not allowed when "
                         "_autoscaling_config is provided.")

    config = DeploymentConfig()
    if num_replicas is not None:
        config.num_replicas = num_replicas

    if user_config is not None:
        config.user_config = user_config

    if max_concurrent_queries is not None:
        config.max_concurrent_queries = max_concurrent_queries

    if _autoscaling_config is not None:
        config.autoscaling_config = _autoscaling_config

    if _graceful_shutdown_wait_loop_s is not None:
        config.graceful_shutdown_wait_loop_s = _graceful_shutdown_wait_loop_s

    if _graceful_shutdown_timeout_s is not None:
        config.graceful_shutdown_timeout_s = _graceful_shutdown_timeout_s

    def decorator(_func_or_class):
        return Deployment(
            _func_or_class,
            name if name is not None else _func_or_class.__name__,
            config,
            version=version,
            prev_version=prev_version,
            init_args=init_args,
            init_kwargs=init_kwargs,
            route_prefix=route_prefix,
            ray_actor_options=ray_actor_options,
            _internal=True,
        )

    # This handles both parametrized and non-parametrized usage of the
    # decorator. See the @serve.batch code for more details.
    return decorator(_func_or_class) if callable(_func_or_class) else decorator
Exemple #16
0
        async def __init__(
            self,
            deployment_name,
            replica_tag,
            init_args,
            init_kwargs,
            deployment_config_proto_bytes: bytes,
            version: DeploymentVersion,
            controller_name: str,
            controller_namespace: str,
            detached: bool,
        ):

            if import_path is not None:
                module_name, attr_name = parse_import_path(import_path)
                deployment_def = getattr(import_module(module_name), attr_name)
            else:
                deployment_def = cloudpickle.loads(serialized_deployment_def)

            deployment_config = DeploymentConfig.from_proto_bytes(
                deployment_config_proto_bytes)

            if inspect.isfunction(deployment_def):
                is_function = True
            elif inspect.isclass(deployment_def):
                is_function = False
            else:
                assert False, (
                    "deployment_def must be function, class, or "
                    "corresponding import path. Instead, it's type was "
                    f"{type(deployment_def)}.")

            # Set the controller name so that serve.connect() in the user's
            # code will connect to the instance that this deployment is running
            # in.
            ray.serve.api._set_internal_replica_context(
                deployment_name,
                replica_tag,
                controller_name,
                controller_namespace,
                servable_object=None,
            )

            assert controller_name, "Must provide a valid controller_name"

            controller_handle = ray.get_actor(controller_name,
                                              namespace=controller_namespace)

            # This closure initializes user code and finalizes replica
            # startup. By splitting the initialization step like this,
            # we can already access this actor before the user code
            # has finished initializing.
            # The supervising state manager can then wait
            # for allocation of this replica by using the `is_allocated`
            # method. After that, it calls `reconfigure` to trigger
            # user code initialization.
            async def initialize_replica():
                if is_function:
                    _callable = deployment_def
                else:
                    # This allows deployments to define an async __init__
                    # method (required for FastAPI).
                    _callable = deployment_def.__new__(deployment_def)
                    await sync_to_async(_callable.__init__)(*init_args,
                                                            **init_kwargs)

                # Setting the context again to update the servable_object.
                ray.serve.api._set_internal_replica_context(
                    deployment_name,
                    replica_tag,
                    controller_name,
                    controller_namespace,
                    servable_object=_callable,
                )

                self.replica = RayServeReplica(
                    _callable,
                    deployment_name,
                    replica_tag,
                    deployment_config,
                    deployment_config.user_config,
                    version,
                    is_function,
                    controller_handle,
                )

            # Is it fine that replica is None here?
            # Should we add a check in all methods that use self.replica
            # or, alternatively, create an async get_replica() method?
            self.replica = None
            self._initialize_replica = initialize_replica
Exemple #17
0
    def __init__(
        self,
        # For serve structured deployment, deployment body can be import path
        # to the class or function instead.
        func_or_class: Union[Callable, str],
        deployment_name: str,
        deployment_init_args: Tuple[Any],
        deployment_init_kwargs: Dict[str, Any],
        ray_actor_options: Dict[str, Any],
        other_args_to_resolve: Optional[Dict[str, Any]] = None,
    ):
        # Assign instance variables in base class constructor.
        super().__init__(
            deployment_init_args,
            deployment_init_kwargs,
            ray_actor_options,
            other_args_to_resolve=other_args_to_resolve,
        )
        if self._contains_input_node():
            raise ValueError(
                "InputNode handles user dynamic input the the DAG, and "
                "cannot be used as args, kwargs, or other_args_to_resolve "
                "in the DeploymentNode constructor because it is not available "
                "at class construction or binding time.")
        # Deployment can be passed into other DAGNodes as init args. This is
        # supported pattern in ray DAG that user can instantiate and pass class
        # instances as init args to others.

        # However in ray serve we send init args via .remote() that requires
        # pickling, and all DAGNode types are not picklable by design.

        # Thus we need convert all DeploymentNode used in init args into
        # deployment handles (executable and picklable) in ray serve DAG to make
        # serve DAG end to end executable.
        def replace_with_handle(node):
            if isinstance(node, DeploymentNode):
                return node._get_serve_deployment_handle(
                    node._deployment, node._bound_other_args_to_resolve)
            elif isinstance(node,
                            (DeploymentMethodNode, DeploymentFunctionNode)):
                from ray.serve.pipeline.json_serde import DAGNodeEncoder

                serve_dag_root_json = json.dumps(node, cls=DAGNodeEncoder)
                return RayServeDAGHandle(serve_dag_root_json)

        (
            replaced_deployment_init_args,
            replaced_deployment_init_kwargs,
        ) = self.apply_functional(
            [deployment_init_args, deployment_init_kwargs],
            predictate_fn=lambda node: isinstance(node, (
                DeploymentNode, DeploymentMethodNode, DeploymentFunctionNode)),
            apply_fn=replace_with_handle,
        )

        if "deployment_schema" in self._bound_other_args_to_resolve:
            deployment_schema: DeploymentSchema = self._bound_other_args_to_resolve[
                "deployment_schema"]
            deployment_shell = schema_to_deployment(deployment_schema)

            # Prefer user specified name to override the generated one.
            if (inspect.isclass(func_or_class)
                    and deployment_shell.name != func_or_class.__name__):
                deployment_name = deployment_shell.name

            # Set the route prefix, prefer the one user supplied,
            # otherwise set it to /deployment_name
            if (deployment_shell.route_prefix is None
                    or deployment_shell.route_prefix !=
                    f"/{deployment_shell.name}"):
                route_prefix = deployment_shell.route_prefix
            else:
                route_prefix = f"/{deployment_name}"

            self._deployment = deployment_shell.options(
                func_or_class=func_or_class,
                name=deployment_name,
                init_args=replaced_deployment_init_args,
                init_kwargs=replaced_deployment_init_kwargs,
                route_prefix=route_prefix,
            )
        else:
            self._deployment: Deployment = Deployment(
                func_or_class,
                deployment_name,
                # TODO: (jiaodong) Support deployment config from user input
                DeploymentConfig(),
                init_args=replaced_deployment_init_args,
                init_kwargs=replaced_deployment_init_kwargs,
                ray_actor_options=ray_actor_options,
                _internal=True,
            )
        self._deployment_handle: Union[
            RayServeLazySyncHandle, RayServeHandle,
            RayServeSyncHandle] = self._get_serve_deployment_handle(
                self._deployment, other_args_to_resolve)
Exemple #18
0
    def get_deploy_args(
        self,
        name: str,
        deployment_def: Union[Callable, Type[Callable], str],
        init_args: Tuple[Any],
        init_kwargs: Dict[Any, Any],
        ray_actor_options: Optional[Dict] = None,
        config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
        version: Optional[str] = None,
        route_prefix: Optional[str] = None,
    ) -> Dict:
        """
        Takes a deployment's configuration, and returns the arguments needed
        for the controller to deploy it.
        """

        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        curr_job_env = ray.get_runtime_context().runtime_env
        if "runtime_env" in ray_actor_options:
            # It is illegal to set field working_dir to None.
            if curr_job_env.get("working_dir") is not None:
                ray_actor_options["runtime_env"].setdefault(
                    "working_dir", curr_job_env.get("working_dir"))
        else:
            ray_actor_options["runtime_env"] = curr_job_env

        replica_config = ReplicaConfig.create(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options,
        )

        if isinstance(config, dict):
            deployment_config = DeploymentConfig.parse_obj(config)
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError(
                "config must be a DeploymentConfig or a dictionary.")

        deployment_config.version = version

        if (deployment_config.autoscaling_config is not None
                and deployment_config.max_concurrent_queries <
                deployment_config.autoscaling_config.
                target_num_ongoing_requests_per_replica  # noqa: E501
            ):
            logger.warning("Autoscaling will never happen, "
                           "because 'max_concurrent_queries' is less than "
                           "'target_num_ongoing_requests_per_replica' now.")

        controller_deploy_args = {
            "name": name,
            "deployment_config_proto_bytes":
            deployment_config.to_proto_bytes(),
            "replica_config_proto_bytes": replica_config.to_proto_bytes(),
            "route_prefix": route_prefix,
            "deployer_job_id": ray.get_runtime_context().job_id,
        }

        return controller_deploy_args
Exemple #19
0
    def deploy(
            self,
            name: str,
            deployment_def: Union[Callable, Type[Callable], str],
            init_args: Tuple[Any],
            init_kwargs: Dict[Any, Any],
            ray_actor_options: Optional[Dict] = None,
            config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
            version: Optional[str] = None,
            prev_version: Optional[str] = None,
            route_prefix: Optional[str] = None,
            url: Optional[str] = None,
            _blocking: Optional[bool] = True) -> Optional[GoalId]:
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        curr_job_env = ray.get_runtime_context().runtime_env
        if "runtime_env" in ray_actor_options:
            ray_actor_options["runtime_env"].setdefault(
                "working_dir", curr_job_env.get("working_dir"))
        else:
            ray_actor_options["runtime_env"] = curr_job_env

        replica_config = ReplicaConfig(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options)

        if isinstance(config, dict):
            deployment_config = DeploymentConfig.parse_obj(config)
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError(
                "config must be a DeploymentConfig or a dictionary.")

        if deployment_config.autoscaling_config is not None and \
            deployment_config.max_concurrent_queries < deployment_config. \
                autoscaling_config.target_num_ongoing_requests_per_replica:
            logger.warning("Autoscaling will never happen, "
                           "because 'max_concurrent_queries' is less than "
                           "'target_num_ongoing_requests_per_replica' now.")

        goal_id, updating = ray.get(
            self._controller.deploy.remote(name,
                                           deployment_config.to_proto_bytes(),
                                           replica_config, version,
                                           prev_version, route_prefix,
                                           ray.get_runtime_context().job_id))

        tag = f"component=serve deployment={name}"

        if updating:
            msg = f"Updating deployment '{name}'"
            if version is not None:
                msg += f" to version '{version}'"
            logger.info(f"{msg}. {tag}")
        else:
            logger.info(f"Deployment '{name}' is already at version "
                        f"'{version}', not updating. {tag}")

        if _blocking:
            self._wait_for_goal(goal_id)

            if url is not None:
                url_part = f" at `{url}`"
            else:
                url_part = ""
            logger.info(
                f"Deployment '{name}{':'+version if version else ''}' is ready"
                f"{url_part}. {tag}")
        else:
            return goal_id
Exemple #20
0
def deployment(
    _func_or_class: Optional[Callable] = None,
    name: Optional[str] = None,
    version: Optional[str] = None,
    num_replicas: Optional[int] = None,
    init_args: Optional[Tuple[Any]] = None,
    init_kwargs: Optional[Dict[Any, Any]] = None,
    route_prefix: Union[str, None, DEFAULT] = DEFAULT.VALUE,
    ray_actor_options: Optional[Dict] = None,
    user_config: Optional[Any] = None,
    max_concurrent_queries: Optional[int] = None,
    _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None,
    _graceful_shutdown_wait_loop_s: Optional[float] = None,
    _graceful_shutdown_timeout_s: Optional[float] = None,
    _health_check_period_s: Optional[float] = None,
    _health_check_timeout_s: Optional[float] = None,
) -> Callable[[Callable], Deployment]:
    """Define a Serve deployment.

    Args:
        name (Optional[str]): Globally-unique name identifying this deployment.
            If not provided, the name of the class or function will be used.
        version (Optional[str]): Version of the deployment. This is used to
            indicate a code change for the deployment; when it is re-deployed
            with a version change, a rolling update of the replicas will be
            performed. If not provided, every deployment will be treated as a
            new version.
        num_replicas (Optional[int]): The number of processes to start up that
            will handle requests to this deployment. Defaults to 1.
        init_args (Optional[Tuple]): Positional args to be passed to the class
            constructor when starting up deployment replicas. These can also be
            passed when you call `.deploy()` on the returned Deployment.
        init_kwargs (Optional[Dict]): Keyword args to be passed to the class
            constructor when starting up deployment replicas. These can also be
            passed when you call `.deploy()` on the returned Deployment.
        route_prefix (Optional[str]): Requests to paths under this HTTP path
            prefix will be routed to this deployment. Defaults to '/{name}'.
            When set to 'None', no HTTP endpoint will be created.
            Routing is done based on longest-prefix match, so if you have
            deployment A with a prefix of '/a' and deployment B with a prefix
            of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests
            to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with
            a '/' unless they're the root (just '/'), which acts as a
            catch-all.
        ray_actor_options: Options to be passed to the Ray actor
            constructor such as resource requirements.
        user_config (Optional[Any]): [experimental] Config to pass to the
            reconfigure method of the deployment. This can be updated
            dynamically without changing the version of the deployment and
            restarting its replicas. The user_config needs to be hashable to
            keep track of updates, so it must only contain hashable types, or
            hashable types nested in lists and dictionaries.
        max_concurrent_queries (Optional[int]): The maximum number of queries
            that will be sent to a replica of this deployment without receiving
            a response. Defaults to 100.

    Example:
    >>> from ray import serve
    >>> @serve.deployment(name="deployment1", version="v1") # doctest: +SKIP
    ... class MyDeployment: # doctest: +SKIP
    ...     pass # doctest: +SKIP

    >>> MyDeployment.deploy(*init_args) # doctest: +SKIP
    >>> MyDeployment.options( # doctest: +SKIP
    ...     num_replicas=2, init_args=init_args).deploy()

    Returns:
        Deployment
    """

    # Num of replicas should not be 0.
    # TODO(Sihan) seperate num_replicas attribute from internal and api
    if num_replicas == 0:
        raise ValueError("num_replicas is expected to larger than 0")

    if num_replicas is not None and _autoscaling_config is not None:
        raise ValueError("Manually setting num_replicas is not allowed when "
                         "_autoscaling_config is provided.")

    config = DeploymentConfig.from_default(
        ignore_none=True,
        num_replicas=num_replicas,
        user_config=user_config,
        max_concurrent_queries=max_concurrent_queries,
        autoscaling_config=_autoscaling_config,
        graceful_shutdown_wait_loop_s=_graceful_shutdown_wait_loop_s,
        graceful_shutdown_timeout_s=_graceful_shutdown_timeout_s,
        health_check_period_s=_health_check_period_s,
        health_check_timeout_s=_health_check_timeout_s,
    )

    def decorator(_func_or_class):
        return Deployment(
            _func_or_class,
            name if name is not None else _func_or_class.__name__,
            config,
            version=version,
            init_args=init_args,
            init_kwargs=init_kwargs,
            route_prefix=route_prefix,
            ray_actor_options=ray_actor_options,
            _internal=True,
        )

    # This handles both parametrized and non-parametrized usage of the
    # decorator. See the @serve.batch code for more details.
    return decorator(_func_or_class) if callable(_func_or_class) else decorator
Exemple #21
0
    def __init__(
        self,
        # For serve structured deployment, deployment body can be import path
        # to the class or function instead.
        func_or_class: Union[Callable, str],
        deployment_name: str,
        deployment_init_args: Tuple[Any],
        deployment_init_kwargs: Dict[str, Any],
        ray_actor_options: Dict[str, Any],
        other_args_to_resolve: Optional[Dict[str, Any]] = None,
    ):
        # Assign instance variables in base class constructor.
        super().__init__(
            deployment_init_args,
            deployment_init_kwargs,
            ray_actor_options,
            other_args_to_resolve=other_args_to_resolve,
        )
        # Deployment can be passed into other DAGNodes as init args. This is
        # supported pattern in ray DAG that user can instantiate and pass class
        # instances as init args to others.

        # However in ray serve we send init args via .remote() that requires
        # pickling, and all DAGNode types are not picklable by design.

        # Thus we need convert all DeploymentNode used in init args into
        # deployment handles (executable and picklable) in ray serve DAG to make
        # serve DAG end to end executable.
        # TODO(jiaodong): This part does some magic for DAGDriver and will throw
        # error with weird pickle replace table error. Move this out.
        def replace_with_handle(node):
            if isinstance(node, DeploymentNode):
                return RayServeLazySyncHandle(node._deployment.name)
            elif isinstance(node, DeploymentExecutorNode):
                return node._deployment_handle

        (
            replaced_deployment_init_args,
            replaced_deployment_init_kwargs,
        ) = self.apply_functional(
            [deployment_init_args, deployment_init_kwargs],
            predictate_fn=lambda node: isinstance(
                node,
                # We need to match and replace all DAGNodes even though they
                # could be None, because no DAGNode replacement should run into
                # re-resolved child DAGNodes, otherwise with KeyError
                (
                    DeploymentNode,
                    DeploymentMethodNode,
                    DeploymentFunctionNode,
                    DeploymentExecutorNode,
                    DeploymentFunctionExecutorNode,
                    DeploymentMethodExecutorNode,
                ),
            ),
            apply_fn=replace_with_handle,
        )

        if "deployment_schema" in self._bound_other_args_to_resolve:
            deployment_schema: DeploymentSchema = self._bound_other_args_to_resolve[
                "deployment_schema"
            ]
            deployment_shell = schema_to_deployment(deployment_schema)

            # Prefer user specified name to override the generated one.
            if (
                inspect.isclass(func_or_class)
                and deployment_shell.name != func_or_class.__name__
            ):
                deployment_name = deployment_shell.name

            # Set the route prefix, prefer the one user supplied,
            # otherwise set it to /deployment_name
            if (
                deployment_shell.route_prefix is None
                or deployment_shell.route_prefix != f"/{deployment_shell.name}"
            ):
                route_prefix = deployment_shell.route_prefix
            else:
                route_prefix = f"/{deployment_name}"

            self._deployment = deployment_shell.options(
                func_or_class=func_or_class,
                name=deployment_name,
                init_args=replaced_deployment_init_args,
                init_kwargs=replaced_deployment_init_kwargs,
                route_prefix=route_prefix,
            )
        else:
            self._deployment: Deployment = Deployment(
                func_or_class,
                deployment_name,
                # TODO: (jiaodong) Support deployment config from user input
                DeploymentConfig(),
                init_args=replaced_deployment_init_args,
                init_kwargs=replaced_deployment_init_kwargs,
                ray_actor_options=ray_actor_options,
                _internal=True,
            )
        self._deployment_handle = RayServeLazySyncHandle(self._deployment.name)