Beispiel #1
0
    def delete(self, tgt: str) -> None:
        """
        Delete the stored data from persistent storage.
        """
        storage_dir = os.path.join(self._base_path, tgt)

        if not os.path.exists(storage_dir):
            raise errors.CheckpointNotFound(
                f"Storage directory does not exist: {storage_dir}")
        if not os.path.isdir(storage_dir):
            raise errors.CheckpointNotFound(
                f"Storage path is not a directory: {storage_dir}")
        shutil.rmtree(storage_dir, ignore_errors=False)
Beispiel #2
0
    def download(self, src: str, dst: Union[str, os.PathLike]) -> None:
        dst = os.fspath(dst)
        path = self.get_storage_prefix(src)
        logging.info(f"Downloading {path} from GCS")
        found = False
        # Listing blobs with prefix set and no delimiter is equivalent to a recursive listing.  If
        # you include a `delimiter="/"` you will get only the file-like blobs inside of a
        # directory-like blob.
        for blob in self.bucket.list_blobs(prefix=path):
            found = True
            _dst = os.path.join(dst, os.path.relpath(blob.name, path))
            dst_dir = os.path.dirname(_dst)
            if not os.path.exists(dst_dir):
                os.makedirs(dst_dir, exist_ok=True)

            # Only create empty directory for keys that end with "/".
            # See `upload` method for more context.
            if blob.name.endswith("/"):
                os.makedirs(_dst, exist_ok=True)
                continue

            logging.debug(f"Downloading from GCS: {blob.name}")

            blob.download_to_filename(_dst)

        if not found:
            raise errors.CheckpointNotFound(f"Did not find checkpoint {path} in GCS")
Beispiel #3
0
 def download(self, src: str, dst: Union[str, os.PathLike]) -> None:
     dst = os.fspath(dst)
     try:
         shutil.copytree(os.path.join(self._base_path, src), dst)
     except FileNotFoundError:
         raise errors.CheckpointNotFound(
             f"Did not find checkpoint {src} in shared_fs storage"
         ) from None
Beispiel #4
0
 def restore_path(self, src: str) -> Iterator[pathlib.Path]:
     """
     Prepare a local directory exposing the checkpoint. Do some simple checks to make sure the
     configuration seems reasonable.
     """
     check.true(
         os.path.exists(self._base_path),
         f"Storage directory does not exist: {self._base_path}. Please verify that you are "
         "using the correct configuration value for checkpoint_storage.host_path",
     )
     storage_dir = os.path.join(self._base_path, src)
     if not os.path.exists(storage_dir):
         raise errors.CheckpointNotFound(
             f"Did not find checkpoint {src} in shared_fs storage")
     yield pathlib.Path(storage_dir)
    def download(self, src: str, dst: Union[str, os.PathLike]) -> None:
        dst = os.fspath(dst)
        logging.info(f"Downloading {src} from Azure Blob Storage")
        found = False
        for blob in self.client.list_files(self.container, file_prefix=src):
            found = True
            _dst = os.path.join(dst, os.path.relpath(blob, src))
            dst_dir = os.path.dirname(_dst)
            os.makedirs(dst_dir, exist_ok=True)

            # Only create empty directory for keys that end with "/".
            if blob.endswith("/"):
                os.makedirs(_dst, exist_ok=True)
                continue

            # Use posixpath so that we always use forward slashes, even on Windows.
            container_blob = posixpath.join(self.container, blob)
            blob_dir, blob_base = posixpath.split(container_blob)
            self.client.get(blob_dir, blob_base, _dst)

        if not found:
            raise errors.CheckpointNotFound(f"Did not find checkpoint {src} in Azure Blob Storage")
Beispiel #6
0
    def download(self, src: str, dst: Union[str, os.PathLike]) -> None:
        dst = os.fspath(dst)
        prefix = self.get_storage_prefix(src)
        logging.info(f"Downloading {prefix} from S3")
        found = False
        for obj in self.bucket.objects.filter(Prefix=prefix):
            found = True
            _dst = os.path.join(dst, os.path.relpath(obj.key, prefix))
            dst_dir = os.path.dirname(_dst)
            os.makedirs(dst_dir, exist_ok=True)

            logging.debug(f"Downloading s3://{self.bucket_name}/{obj.key} to {_dst}")

            # Only create empty directory for keys that end with "/".
            # See `upload` method for more context.
            if obj.key.endswith("/"):
                os.makedirs(_dst, exist_ok=True)
                continue

            self.bucket.download_file(obj.key, _dst)

        if not found:
            raise errors.CheckpointNotFound(f"Did not find {prefix} in S3")