Exemple #1
0
    async def deploy(self, name: str, backend_config: BackendConfig,
                     replica_config: ReplicaConfig, version: Optional[str],
                     route_prefix: Optional[str]) -> Optional[GoalId]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        python_methods = []
        if inspect.isclass(replica_config.backend_def):
            for method_name, _ in inspect.getmembers(
                    replica_config.backend_def, inspect.isfunction):
                python_methods.append(method_name)

        async with self.write_lock:
            backend_info = BackendInfo(worker_class=create_backend_replica(
                replica_config.backend_def),
                                       version=version,
                                       backend_config=backend_config,
                                       replica_config=replica_config)

            goal_id = self.backend_state.deploy_backend(name, backend_info)
            endpoint_info = EndpointInfo(ALL_HTTP_METHODS,
                                         route=route_prefix,
                                         python_methods=python_methods)
            self.endpoint_state.update_endpoint(name, endpoint_info,
                                                TrafficPolicy({name: 1.0}))
            return goal_id
Exemple #2
0
    def create_backend(self, backend_tag: BackendTag,
                       backend_config: BackendConfig,
                       replica_config: ReplicaConfig) -> Optional[GoalId]:
        # Ensures this method is idempotent.
        backend_info = self._backend_metadata.get(backend_tag)
        if backend_info is not None:
            if (backend_info.backend_config == backend_config
                    and backend_info.replica_config == replica_config):
                return None

        backend_replica_class = create_backend_replica(
            replica_config.func_or_class)

        # Save creator that starts replicas, the arguments to be passed in,
        # and the configuration for the backends.
        backend_info = BackendInfo(worker_class=backend_replica_class,
                                   backend_config=backend_config,
                                   replica_config=replica_config)

        new_goal_id, existing_goal_id = self._set_backend_goal(
            backend_tag, backend_info)

        # NOTE(edoakes): we must write a checkpoint before starting new
        # or pushing the updated config to avoid inconsistent state if we
        # crash while making the change.
        self._checkpoint()
        self._notify_backend_configs_changed()

        if existing_goal_id is not None:
            self._goal_manager.complete_goal(existing_goal_id)
        return new_goal_id
Exemple #3
0
    async def deploy(
            self, name: str, backend_config: BackendConfig,
            replica_config: ReplicaConfig, python_methods: List[str],
            version: Optional[str],
            route_prefix: Optional[str]) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        async with self.write_lock:
            backend_info = BackendInfo(actor_def=ray.remote(
                create_backend_replica(name,
                                       replica_config.serialized_backend_def)),
                                       version=version,
                                       backend_config=backend_config,
                                       replica_config=replica_config)

            goal_id, updating = self.backend_state.deploy_backend(
                name, backend_info)
            endpoint_info = EndpointInfo(ALL_HTTP_METHODS,
                                         route=route_prefix,
                                         python_methods=python_methods,
                                         legacy=False)
            self.endpoint_state.update_endpoint(name, endpoint_info,
                                                TrafficPolicy({name: 1.0}))
            return goal_id, updating
Exemple #4
0
    async def create_backend(self, backend_tag: BackendTag,
                             backend_config: BackendConfig,
                             replica_config: ReplicaConfig) -> UUID:
        """Register a new backend under the specified tag."""
        async with self.write_lock:
            # Ensures this method is idempotent.
            backend_info = self.backend_state.get_backend(backend_tag)
            if backend_info is not None:
                if (backend_info.backend_config == backend_config
                        and backend_info.replica_config == replica_config):
                    return

            backend_replica = create_backend_replica(
                replica_config.func_or_class)

            # Save creator that starts replicas, the arguments to be passed in,
            # and the configuration for the backends.
            backend_info = BackendInfo(
                worker_class=backend_replica,
                backend_config=backend_config,
                replica_config=replica_config)
            self.backend_state.add_backend(backend_tag, backend_info)
            metadata = backend_config.internal_metadata
            if metadata.autoscaling_config is not None:
                self.autoscaling_policies[
                    backend_tag] = BasicAutoscalingPolicy(
                        backend_tag, metadata.autoscaling_config)

            try:
                # This call should be to run control loop
                self.actor_reconciler._scale_backend_replicas(
                    self.backend_state.backends, backend_tag,
                    backend_config.num_replicas)
            except RayServeException as e:
                del self.backend_state.backends[backend_tag]
                raise e

            return_uuid = self._create_event_with_result({
                backend_tag: backend_info
            })
            # NOTE(edoakes): we must write a checkpoint before starting new
            # or pushing the updated config to avoid inconsistent state if we
            # crash while making the change.
            self._checkpoint()
            await self.actor_reconciler._enqueue_pending_scale_changes_loop(
                self.backend_state)
            await self.actor_reconciler.backend_control_loop()

            self.notify_replica_handles_changed()

            # Set the backend config inside routers
            # (particularly for max_concurrent_queries).
            self.notify_backend_configs_changed()
            return return_uuid
Exemple #5
0
 async def create_backend(
         self, backend_tag: BackendTag, backend_config: BackendConfig,
         replica_config: ReplicaConfig) -> Optional[GoalId]:
     """Register a new backend under the specified tag."""
     async with self.write_lock:
         backend_info = BackendInfo(worker_class=create_backend_replica(
             replica_config.backend_def),
                                    version=RESERVED_VERSION_TAG,
                                    backend_config=backend_config,
                                    replica_config=replica_config)
         return self.backend_state.deploy_backend(backend_tag, backend_info)
Exemple #6
0
    async def create_backend(self, backend_tag: BackendTag,
                             backend_config: BackendConfig,
                             replica_config: ReplicaConfig) -> None:
        """Register a new backend under the specified tag."""
        async with self.write_lock:
            # Ensures this method is idempotent.
            backend_info = self.configuration_store.get_backend(backend_tag)
            if backend_info is not None:
                if (backend_info.backend_config == backend_config
                        and backend_info.replica_config == replica_config):
                    return

            backend_replica = create_backend_replica(
                replica_config.func_or_class)

            # Save creator that starts replicas, the arguments to be passed in,
            # and the configuration for the backends.
            self.configuration_store.add_backend(
                backend_tag,
                BackendInfo(
                    worker_class=backend_replica,
                    backend_config=backend_config,
                    replica_config=replica_config))
            metadata = backend_config.internal_metadata
            if metadata.autoscaling_config is not None:
                self.autoscaling_policies[
                    backend_tag] = BasicAutoscalingPolicy(
                        backend_tag, metadata.autoscaling_config)

            try:
                self.actor_reconciler._scale_backend_replicas(
                    self.configuration_store.backends, backend_tag,
                    backend_config.num_replicas)
            except RayServeException as e:
                del self.configuration_store.backends[backend_tag]
                raise e

            # NOTE(edoakes): we must write a checkpoint before starting new
            # or pushing the updated config to avoid inconsistent state if we
            # crash while making the change.
            self._checkpoint()
            await self.actor_reconciler._start_pending_backend_replicas(
                self.configuration_store)

            # Set the backend config inside the router
            # (particularly for max-batch-size).
            await asyncio.gather(*[
                router.set_backend_config.remote(backend_tag, backend_config)
                for router in self.actor_reconciler.router_handles()
            ])
            await self.broadcast_backend_config(backend_tag)
Exemple #7
0
    async def deploy(self, name: str, backend_config: BackendConfig,
                     replica_config: ReplicaConfig,
                     version: Optional[str]) -> Optional[GoalId]:
        # By default the path prefix is the deployment name.
        if replica_config.path_prefix is None:
            replica_config.path_prefix = f"/{name}"
            # Backend config should be synchronized so the backend worker
            # is aware of it.
            backend_config.internal_metadata.path_prefix = f"/{name}"
        else:
            if ("{" in replica_config.path_prefix
                    or "}" in replica_config.path_prefix):
                raise ValueError(
                    "Wildcard routes are not supported for deployment paths. "
                    "Please use @serve.ingress with FastAPI instead.")

        if replica_config.is_asgi_app:
            # When the backend is asgi application, we want to proxy it
            # with a prefixed path as well as proxy all HTTP methods.
            # {wildcard:path} is used so HTTPProxy's Starlette router can match
            # arbitrary path.
            path_prefix = replica_config.path_prefix
            if path_prefix.endswith("/"):
                path_prefix = path_prefix[:-1]
            http_route = path_prefix + WILDCARD_PATH_SUFFIX
            http_methods = ALL_HTTP_METHODS
        else:
            http_route = replica_config.path_prefix
            # Generic endpoint should support a limited subset of HTTP methods.
            http_methods = ["GET", "POST"]

        python_methods = []
        if inspect.isclass(replica_config.backend_def):
            for method_name, _ in inspect.getmembers(
                    replica_config.backend_def, inspect.isfunction):
                python_methods.append(method_name)

        async with self.write_lock:
            backend_info = BackendInfo(worker_class=create_backend_replica(
                replica_config.backend_def),
                                       version=version,
                                       backend_config=backend_config,
                                       replica_config=replica_config)

            goal_id = self.backend_state.deploy_backend(name, backend_info)
            self.endpoint_state.create_endpoint(name,
                                                http_route,
                                                http_methods,
                                                TrafficPolicy({name: 1.0}),
                                                python_methods=python_methods)
            return goal_id
Exemple #8
0
    async def deploy(self, name: str, backend_config: BackendConfig,
                     replica_config: ReplicaConfig,
                     version: Optional[str]) -> Optional[GoalId]:
        # By default the path prefix is the deployment name.
        if replica_config.path_prefix is None:
            replica_config.path_prefix = f"/{name}"
            # Backend config should be synchronized so the backend worker
            # is aware of it.
            backend_config.internal_metadata.path_prefix = f"/{name}"

        if replica_config.is_asgi_app:
            # When the backend is asgi application, we want to proxy it
            # with a prefixed path as well as proxy all HTTP methods.
            # {wildcard:path} is used so HTTPProxy's Starlette router can match
            # arbitrary path.
            http_route = f"{replica_config.path_prefix}" + "/{wildcard:path}"
            # https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods
            http_methods = [
                "GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS",
                "TRACE", "PATCH"
            ]
        else:
            http_route = replica_config.path_prefix
            # Generic endpoint should support a limited subset of HTTP methods.
            http_methods = ["GET", "POST"]

        python_methods = []
        if inspect.isclass(replica_config.backend_def):
            for method_name, _ in inspect.getmembers(
                    replica_config.backend_def, inspect.isfunction):
                python_methods.append(method_name)

        async with self.write_lock:
            backend_info = BackendInfo(
                worker_class=create_backend_replica(
                    replica_config.backend_def),
                version=version,
                backend_config=backend_config,
                replica_config=replica_config)

            goal_id = self.backend_state.deploy_backend(name, backend_info)
            self.endpoint_state.create_endpoint(
                name,
                http_route,
                http_methods,
                TrafficPolicy({
                    name: 1.0
                }),
                python_methods=python_methods)
            return goal_id
Exemple #9
0
 async def create_backend(
         self, backend_tag: BackendTag, backend_config: BackendConfig,
         replica_config: ReplicaConfig) -> Optional[GoalId]:
     """Register a new backend under the specified tag."""
     async with self.write_lock:
         backend_info = BackendInfo(actor_def=ray.remote(
             create_backend_replica(backend_tag,
                                    replica_config.serialized_backend_def)),
                                    version=RESERVED_VERSION_TAG,
                                    backend_config=backend_config,
                                    replica_config=replica_config)
         goal_id, _ = self.backend_state.deploy_backend(
             backend_tag, backend_info)
         return goal_id
Exemple #10
0
    async def deploy(self,
                     name: str,
                     backend_config: BackendConfig,
                     replica_config: ReplicaConfig,
                     python_methods: List[str],
                     version: Optional[str],
                     prev_version: Optional[str],
                     route_prefix: Optional[str],
                     deployer_job_id: "Optional[ray._raylet.JobID]" = None
                     ) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        async with self.write_lock:
            if prev_version is not None:
                existing_backend_info = self.backend_state.get_backend(name)
                if (existing_backend_info is None
                        or not existing_backend_info.version):
                    raise ValueError(
                        f"prev_version '{prev_version}' is specified but "
                        "there is no existing deployment.")
                if existing_backend_info.version != prev_version:
                    raise ValueError(
                        f"prev_version '{prev_version}' "
                        "does not match with the existing "
                        f"version '{existing_backend_info.version}'.")

            backend_info = BackendInfo(
                actor_def=ray.remote(
                    create_backend_replica(
                        name, replica_config.serialized_backend_def)),
                version=version,
                backend_config=backend_config,
                replica_config=replica_config,
                deployer_job_id=deployer_job_id,
                start_time_ms=int(time.time() * 1000))

            goal_id, updating = self.backend_state.deploy_backend(
                name, backend_info)
            endpoint_info = EndpointInfo(
                ALL_HTTP_METHODS,
                route=route_prefix,
                python_methods=python_methods,
                legacy=False)
            self.endpoint_state.update_endpoint(name, endpoint_info,
                                                TrafficPolicy({
                                                    name: 1.0
                                                }))
            return goal_id, updating
Exemple #11
0
    def deploy(
        self,
        name: str,
        backend_config_proto_bytes: bytes,
        replica_config: ReplicaConfig,
        version: Optional[str],
        prev_version: Optional[str],
        route_prefix: Optional[str],
        deployer_job_id: "Optional[ray._raylet.JobID]" = None
    ) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        backend_config = BackendConfig.from_proto_bytes(
            backend_config_proto_bytes)

        if prev_version is not None:
            existing_backend_info = self.backend_state_manager.get_backend(
                name)
            if (existing_backend_info is None
                    or not existing_backend_info.version):
                raise ValueError(
                    f"prev_version '{prev_version}' is specified but "
                    "there is no existing deployment.")
            if existing_backend_info.version != prev_version:
                raise ValueError(f"prev_version '{prev_version}' "
                                 "does not match with the existing "
                                 f"version '{existing_backend_info.version}'.")
        backend_info = BackendInfo(actor_def=ray.remote(
            create_backend_replica(name,
                                   replica_config.serialized_backend_def)),
                                   version=version,
                                   backend_config=backend_config,
                                   replica_config=replica_config,
                                   deployer_job_id=deployer_job_id,
                                   start_time_ms=int(time.time() * 1000))
        # TODO(architkulkarni): When a deployment is redeployed, even if
        # the only change was num_replicas, the start_time_ms is refreshed.
        # This is probably not the desired behavior for an autoscaling
        # deployment, which redeploys very often to change num_replicas.

        goal_id, updating = self.backend_state_manager.deploy_backend(
            name, backend_info)
        endpoint_info = EndpointInfo(route=route_prefix)
        self.endpoint_state.update_endpoint(name, endpoint_info)
        return goal_id, updating
Exemple #12
0
    def deploy_backend(self,
                       backend_tag: BackendTag,
                       backend_config: BackendConfig,
                       replica_config: ReplicaConfig,
                       version: Optional[str] = None) -> Optional[GoalId]:
        # Ensures this method is idempotent.
        backend_info = self._backend_metadata.get(backend_tag)
        if backend_info is not None:
            # Old codepath.
            if version is None:
                if (backend_info.backend_config == backend_config
                        and backend_info.replica_config == replica_config):
                    return self._backend_goals.get(backend_tag, None)
            # New codepath: treat version as ground truth for implementation.
            else:
                if (backend_info.backend_config == backend_config
                        and self._target_versions[backend_tag] == version):
                    return self._backend_goals.get(backend_tag, None)

        if backend_tag not in self._replicas:
            self._replicas[backend_tag] = ReplicaStateContainer()

        backend_replica_class = create_backend_replica(
            replica_config.backend_def)

        # Save creator that starts replicas, the arguments to be passed in,
        # and the configuration for the backends.
        backend_info = BackendInfo(worker_class=backend_replica_class,
                                   backend_config=backend_config,
                                   replica_config=replica_config)

        new_goal_id, existing_goal_id = self._set_backend_goal(
            backend_tag, backend_info, version)

        # NOTE(edoakes): we must write a checkpoint before starting new
        # or pushing the updated config to avoid inconsistent state if we
        # crash while making the change.
        self._checkpoint()
        self._notify_backend_configs_changed(backend_tag)

        if existing_goal_id is not None:
            self._goal_manager.complete_goal(existing_goal_id)
        return new_goal_id
Exemple #13
0
    async def create_backend(self, backend_tag: BackendTag,
                             backend_config: BackendConfig,
                             replica_config: ReplicaConfig) -> UUID:
        """Register a new backend under the specified tag."""
        async with self.write_lock:
            # Ensures this method is idempotent.
            backend_info = self.backend_state.get_backend(backend_tag)
            if backend_info is not None:
                if (backend_info.backend_config == backend_config
                        and backend_info.replica_config == replica_config):
                    return

            backend_replica = create_backend_replica(
                replica_config.func_or_class)

            # Save creator that starts replicas, the arguments to be passed in,
            # and the configuration for the backends.
            backend_info = BackendInfo(
                worker_class=backend_replica,
                backend_config=backend_config,
                replica_config=replica_config)

            return_uuid = self._create_event_with_result({
                backend_tag: backend_info
            })

            await self.set_backend_goal(backend_tag, backend_info, return_uuid)

            try:
                # This call should be to run control loop
                self.backend_state.scale_backend_replicas(
                    backend_tag, backend_config.num_replicas)
            except RayServeException as e:
                del self.backend_state.backends[backend_tag]
                raise e

            # NOTE(edoakes): we must write a checkpoint before starting new
            # or pushing the updated config to avoid inconsistent state if we
            # crash while making the change.
            self._checkpoint()
            self.notify_backend_configs_changed()
            return return_uuid
Exemple #14
0
 async def create_backend(
     self,
     backend_tag: BackendTag,
     backend_config: BackendConfig,
     replica_config: ReplicaConfig,
     deployer_job_id: Optional["Optional[ray._raylet.JobID]"] = None
 ) -> Optional[GoalId]:
     """Register a new backend under the specified tag."""
     async with self.write_lock:
         backend_info = BackendInfo(actor_def=ray.remote(
             create_backend_replica(backend_tag,
                                    replica_config.serialized_backend_def)),
                                    version=RESERVED_VERSION_TAG,
                                    backend_config=backend_config,
                                    replica_config=replica_config,
                                    start_time_ms=int(time.time() * 1000),
                                    deployer_job_id=deployer_job_id)
         goal_id, _ = self.backend_state.deploy_backend(
             backend_tag, backend_info)
         return goal_id
Exemple #15
0
    async def deploy(self, name: str, backend_config: BackendConfig,
                     replica_config: ReplicaConfig, version: Optional[str],
                     route_prefix: Optional[str]) -> Optional[GoalId]:
        if route_prefix is None:
            route_prefix = f"/{name}"

        if replica_config.is_asgi_app:
            # When the backend is asgi application, we want to proxy it
            # with a prefixed path as well as proxy all HTTP methods.
            # {wildcard:path} is used so HTTPProxy's Starlette router can match
            # arbitrary path.
            if route_prefix.endswith("/"):
                route_prefix = route_prefix[:-1]
            http_route = route_prefix + WILDCARD_PATH_SUFFIX
            http_methods = ALL_HTTP_METHODS
        else:
            http_route = route_prefix
            # Generic endpoint should support a limited subset of HTTP methods.
            http_methods = ["GET", "POST"]

        python_methods = []
        if inspect.isclass(replica_config.backend_def):
            for method_name, _ in inspect.getmembers(
                    replica_config.backend_def, inspect.isfunction):
                python_methods.append(method_name)

        async with self.write_lock:
            backend_info = BackendInfo(worker_class=create_backend_replica(
                replica_config.backend_def),
                                       version=version,
                                       backend_config=backend_config,
                                       replica_config=replica_config)

            goal_id = self.backend_state.deploy_backend(name, backend_info)
            self.endpoint_state.update_endpoint(name,
                                                http_route,
                                                http_methods,
                                                TrafficPolicy({name: 1.0}),
                                                python_methods=python_methods)
            return goal_id
Exemple #16
0
    async def deploy(self, name: str, backend_config: BackendConfig,
                     replica_config: ReplicaConfig, version: Optional[str],
                     route_prefix: Optional[str]) -> Optional[GoalId]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        if replica_config.is_asgi_app:
            # When the backend is asgi application, we want to proxy it
            # with a prefixed path as well as proxy all HTTP methods.
            http_methods = ALL_HTTP_METHODS
        else:
            # Generic endpoint should support a limited subset of HTTP methods.
            http_methods = ["GET", "POST"]

        python_methods = []
        if inspect.isclass(replica_config.backend_def):
            for method_name, _ in inspect.getmembers(
                    replica_config.backend_def, inspect.isfunction):
                python_methods.append(method_name)

        async with self.write_lock:
            backend_info = BackendInfo(
                worker_class=create_backend_replica(
                    replica_config.backend_def),
                version=version,
                backend_config=backend_config,
                replica_config=replica_config)

            goal_id = self.backend_state.deploy_backend(name, backend_info)
            endpoint_info = EndpointInfo(
                http_methods,
                route=route_prefix,
                python_methods=python_methods)
            self.endpoint_state.update_endpoint(name, endpoint_info,
                                                TrafficPolicy({
                                                    name: 1.0
                                                }))
            return goal_id
Exemple #17
0
 async def __init__(self):
     self.worker = object.__new__(create_backend_replica(backend_def))
     await self.worker.__init__(name, name + ":tag", init_args,
                                backend_config, controller_name)
Exemple #18
0
 def __init__(self):
     self.worker = create_backend_replica(func_or_class)(
         name, name + ":tag", init_args, backend_config,
         controller_name)