Ejemplo n.º 1
0
 def _check_if_container_restart_is_needed(
     self, image: str, cleaned_bind_mounts: Dict[str, str]
 ) -> bool:
     re_init_required = False
     running_image = (
         self.run(
             check_docker_image(self.container_name, self.docker_cmd),
             with_output=True,
             run_env="host",
         )
         .decode("utf-8")
         .strip()
     )
     if running_image != image:
         cli_logger.error(
             "A container with name {} is running image {} instead "
             + "of {} (which was provided in the YAML)",
             self.container_name,
             running_image,
             image,
         )
     mounts = (
         self.run(
             check_bind_mounts_cmd(self.container_name, self.docker_cmd),
             with_output=True,
             run_env="host",
         )
         .decode("utf-8")
         .strip()
     )
     try:
         active_mounts = json.loads(mounts)
         active_remote_mounts = {
             mnt["Destination"].strip("/") for mnt in active_mounts
         }
         # Ignore ray bootstrap files.
         requested_remote_mounts = {
             self._docker_expand_user(remote).strip("/")
             for remote in cleaned_bind_mounts.keys()
         }
         unfulfilled_mounts = requested_remote_mounts - active_remote_mounts
         if unfulfilled_mounts:
             re_init_required = True
             cli_logger.warning(
                 "This Docker Container is already running. "
                 "Restarting the Docker container on "
                 "this node to pick up the following file_mounts {}",
                 unfulfilled_mounts,
             )
     except json.JSONDecodeError:
         cli_logger.verbose(
             "Unable to check if file_mounts specified in the YAML "
             "differ from those on the running container."
         )
     return re_init_required
Ejemplo n.º 2
0
    def run_init(self, *, as_head, file_mounts):
        BOOTSTRAP_MOUNTS = [
            "~/ray_bootstrap_config.yaml", "~/ray_bootstrap_key.pem"
        ]

        image = self.docker_config.get("image")
        image = self.docker_config.get(
            f"{'head' if as_head else 'worker'}_image", image)

        self._check_docker_installed()
        if self.docker_config.get("pull_before_run", True):
            assert image, "Image must be included in config if " + \
                "pull_before_run is specified"

            self.run("docker pull {}".format(image), run_env="host")

        # Bootstrap files cannot be bind mounted because docker opens the
        # underlying inode. When the file is switched, docker becomes outdated.
        cleaned_bind_mounts = file_mounts.copy()
        for mnt in BOOTSTRAP_MOUNTS:
            cleaned_bind_mounts.pop(mnt, None)

        if not self._check_container_status():
            # Get home directory
            image_env = self.ssh_command_runner.run(
                "docker inspect -f '{{json .Config.Env}}' " + image,
                with_output=True).decode().strip()
            home_directory = "/root"
            for env_var in json.loads(image_env):
                if env_var.startswith("HOME="):
                    home_directory = env_var.split("HOME=")[1]
                    break

            start_command = docker_start_cmds(
                self.ssh_command_runner.ssh_user, image, cleaned_bind_mounts,
                self.container_name,
                self.docker_config.get("run_options", []) +
                self.docker_config.get(
                    f"{'head' if as_head else 'worker'}_run_options", []) +
                self._configure_runtime() + self._auto_configure_shm(),
                self.ssh_command_runner.cluster_name, home_directory)
            self.run(start_command, run_env="host")
        else:
            running_image = self.run(check_docker_image(self.container_name),
                                     with_output=True,
                                     run_env="host").decode("utf-8").strip()
            if running_image != image:
                logger.error(f"A container with name {self.container_name} " +
                             f"is running image {running_image} instead " +
                             f"of {image} (which was provided in the YAML")
            mounts = self.run(check_bind_mounts_cmd(self.container_name),
                              with_output=True,
                              run_env="host").decode("utf-8").strip()
            try:
                active_mounts = json.loads(mounts)
                active_remote_mounts = [
                    mnt["Destination"] for mnt in active_mounts
                ]
                # Ignore ray bootstrap files.
                for remote, local in cleaned_bind_mounts.items():
                    remote = self._docker_expand_user(remote)
                    if remote not in active_remote_mounts:
                        cli_logger.error(
                            "Please ray stop & restart cluster to "
                            f"allow mount {remote}:{local} to take hold")
            except json.JSONDecodeError:
                cli_logger.verbose(
                    "Unable to check if file_mounts specified in the YAML "
                    "differ from those on the running container.")

        # Explicitly copy in ray bootstrap files.
        for mount in BOOTSTRAP_MOUNTS:
            if mount in file_mounts:
                self.ssh_command_runner.run(
                    "docker cp {src} {container}:{dst}".format(
                        src=os.path.join(
                            self._get_docker_host_mount_location(
                                self.ssh_command_runner.cluster_name), mount),
                        container=self.container_name,
                        dst=self._docker_expand_user(mount)))
        self.initialized = True