def delete(self, tgt: str) -> None: """ Delete the stored data from persistent storage. """ storage_dir = os.path.join(self._base_path, tgt) if not os.path.exists(storage_dir): raise errors.CheckpointNotFound( f"Storage directory does not exist: {storage_dir}") if not os.path.isdir(storage_dir): raise errors.CheckpointNotFound( f"Storage path is not a directory: {storage_dir}") shutil.rmtree(storage_dir, ignore_errors=False)
def download(self, src: str, dst: Union[str, os.PathLike]) -> None: dst = os.fspath(dst) path = self.get_storage_prefix(src) logging.info(f"Downloading {path} from GCS") found = False # Listing blobs with prefix set and no delimiter is equivalent to a recursive listing. If # you include a `delimiter="/"` you will get only the file-like blobs inside of a # directory-like blob. for blob in self.bucket.list_blobs(prefix=path): found = True _dst = os.path.join(dst, os.path.relpath(blob.name, path)) dst_dir = os.path.dirname(_dst) if not os.path.exists(dst_dir): os.makedirs(dst_dir, exist_ok=True) # Only create empty directory for keys that end with "/". # See `upload` method for more context. if blob.name.endswith("/"): os.makedirs(_dst, exist_ok=True) continue logging.debug(f"Downloading from GCS: {blob.name}") blob.download_to_filename(_dst) if not found: raise errors.CheckpointNotFound(f"Did not find checkpoint {path} in GCS")
def download(self, src: str, dst: Union[str, os.PathLike]) -> None: dst = os.fspath(dst) try: shutil.copytree(os.path.join(self._base_path, src), dst) except FileNotFoundError: raise errors.CheckpointNotFound( f"Did not find checkpoint {src} in shared_fs storage" ) from None
def restore_path(self, src: str) -> Iterator[pathlib.Path]: """ Prepare a local directory exposing the checkpoint. Do some simple checks to make sure the configuration seems reasonable. """ check.true( os.path.exists(self._base_path), f"Storage directory does not exist: {self._base_path}. Please verify that you are " "using the correct configuration value for checkpoint_storage.host_path", ) storage_dir = os.path.join(self._base_path, src) if not os.path.exists(storage_dir): raise errors.CheckpointNotFound( f"Did not find checkpoint {src} in shared_fs storage") yield pathlib.Path(storage_dir)
def download(self, src: str, dst: Union[str, os.PathLike]) -> None: dst = os.fspath(dst) logging.info(f"Downloading {src} from Azure Blob Storage") found = False for blob in self.client.list_files(self.container, file_prefix=src): found = True _dst = os.path.join(dst, os.path.relpath(blob, src)) dst_dir = os.path.dirname(_dst) os.makedirs(dst_dir, exist_ok=True) # Only create empty directory for keys that end with "/". if blob.endswith("/"): os.makedirs(_dst, exist_ok=True) continue # Use posixpath so that we always use forward slashes, even on Windows. container_blob = posixpath.join(self.container, blob) blob_dir, blob_base = posixpath.split(container_blob) self.client.get(blob_dir, blob_base, _dst) if not found: raise errors.CheckpointNotFound(f"Did not find checkpoint {src} in Azure Blob Storage")
def download(self, src: str, dst: Union[str, os.PathLike]) -> None: dst = os.fspath(dst) prefix = self.get_storage_prefix(src) logging.info(f"Downloading {prefix} from S3") found = False for obj in self.bucket.objects.filter(Prefix=prefix): found = True _dst = os.path.join(dst, os.path.relpath(obj.key, prefix)) dst_dir = os.path.dirname(_dst) os.makedirs(dst_dir, exist_ok=True) logging.debug(f"Downloading s3://{self.bucket_name}/{obj.key} to {_dst}") # Only create empty directory for keys that end with "/". # See `upload` method for more context. if obj.key.endswith("/"): os.makedirs(_dst, exist_ok=True) continue self.bucket.download_file(obj.key, _dst) if not found: raise errors.CheckpointNotFound(f"Did not find {prefix} in S3")