def store_path(self, storage_id: str = "") -> Iterator[Tuple[str, str]]: """ Prepare a local directory that will become a checkpoint. This base implementation creates the temporary directory and chooses a random checkpoint ID, but subclasses whose storage backends are in remote places are responsible for uploading the data after the files are created and deleting the temporary checkpoint directory. """ if storage_id == "": storage_id = str(uuid.uuid4()) # Set umask to 0 in order that the storage dir allows future containers of any owner to # create new checkpoints. Administrators wishing to control the permissions more # specifically should just create the storage path themselves; this will not interfere. old_umask = os.umask(0) os.makedirs(self._base_path, exist_ok=True, mode=0o777) # Restore the original umask. os.umask(old_umask) os.makedirs(self._base_path, exist_ok=True) storage_dir = os.path.join(self._base_path, storage_id) yield (storage_id, storage_dir) check_true(os.path.exists(storage_dir), "Checkpoint did not create a storage directory") metadata = StorageMetadata(storage_id, StorageManager._list_directory(storage_dir)) self.post_store_path(storage_id, storage_dir, metadata)
def store(self, store_data: Storable, storage_id: str = "") -> StorageMetadata: """ Save the object to the backing persistent storage. """ if storage_id == "": storage_id = str(uuid.uuid4()) # Set umask to 0 in order that the storage dir allows future containers of any owner to # create new checkpoints. Administrators wishing to control the permissions more # specifically should just create the storage path themselves; this will not interfere. old_umask = os.umask(0) os.makedirs(self._base_path, exist_ok=True, mode=0o777) # Restore the original umask. os.umask(old_umask) storage_dir = os.path.join(self._base_path, storage_id) store_data.save(storage_dir) check_true(os.path.exists(storage_dir), "Checkpoint did not create a storage directory") directory_list = StorageManager._list_directory(storage_dir) logging.info("Storing checkpoint {} ({})".format( storage_id, sizeof_fmt(sum(directory_list.values())))) return StorageMetadata(storage_id, directory_list)
def read_single_file(file_path: Optional[pathlib.Path]) -> Tuple[bytes, int]: """ Given a path to a file, return the base64-encoded contents of the file and its original size. """ if not file_path: return b"", 0 check.check_true(file_path.is_file(), 'The file at "{}" could not be found'.format(file_path)) content = file_path.read_bytes() return base64.b64encode(content), len(content)
def delete(self, metadata: StorageMetadata) -> None: """ Delete the stored data from persistent storage. """ storage_dir = os.path.join(self._base_path, metadata.storage_id) check_true(os.path.exists(storage_dir), "Storage directory does not exist: {}".format(storage_dir)) check_true(os.path.isdir(storage_dir), "Storage path is not a directory: {}".format(storage_dir)) self._remove_checkpoint_directory(metadata.storage_id, ignore_errors=False)
def convert_notebook_to_python_script(notebook_path: str) -> str: check.check_true( notebook_path.endswith(".ipynb"), f"Notebook file {notebook_path} must has a suffix .ipynb" ) processed_cells_path = f"{notebook_path[:-6]}__det__.py" with open(notebook_path, "r") as f1, open(processed_cells_path, "w") as f2: obj = json.load(f1) check.true("cells" in obj, f"Invalid notebook file {notebook_path}") for cell in obj["cells"]: if cell["cell_type"] == "code": lines = [line for line in cell["source"] if not line.lstrip().startswith("!")] f2.writelines(lines) f2.write("\n") return processed_cells_path
def _full_storage_dir(host_path: str, container_path: str, storage_path: Optional[str]) -> str: """ Return the full path to the storage base directory. """ if storage_path is not None: abs_path = os.path.normpath(os.path.join(host_path, storage_path)) check_true(abs_path.startswith(host_path), "storage path must be a subdirectory of host path.") storage_path = os.path.relpath(abs_path, host_path) if storage_path is not None: return os.path.join(container_path, storage_path) return container_path
def restore(self, storage_data: Storable, metadata: StorageMetadata) -> None: """ Load the object from the backing persistent storage. """ storage_dir = os.path.join(self._base_path, metadata.storage_id) check_true( os.path.exists(storage_dir), "Storage directory does not exist: {}. Please verify " "that you are using the correct configuration value for " "checkpoint_storage.host_path.".format(storage_dir), ) check_true( os.path.isdir(storage_dir), "Checkpoint path is not a directory: {}".format(storage_dir)) storage_data.load(storage_dir)
def __init__( self, host_path: str, container_path: str = "/determined_shared_fs", storage_path: Optional[str] = None, propagation: str = "rprivate", ) -> None: super().__init__( _full_storage_dir(host_path, container_path, storage_path)) check_type(host_path, str, "`host_path` must be a str.") check_true(os.path.isabs(host_path), "`host_path` must be an absolute path.") check_type(container_path, str, "`container_path` must be a str.") check_true(os.path.isabs(container_path), "`container_path` must be an absolute path.") check_type(propagation, str, "`propagation` must be a str.") check_gt(len(host_path), 0, "`host_path` must be non-empty.") check_gt(len(container_path), 0, "`container_path` must be non-empty.") self.host_path = host_path self.container_path = container_path self.propagation = propagation
def store_path( self, storage_id: str = "") -> Generator[Tuple[str, str], None, None]: """ Prepare a local directory that will become a checkpoint. This base implementation creates the temporary directory and chooses a random checkpoint ID, but subclasses whose storage backends are in remote places are responsible for uploading the data after the files are created and deleting the temporary checkpoint directory. """ if storage_id == "": storage_id = str(uuid.uuid4()) os.makedirs(self._base_path, exist_ok=True) storage_dir = os.path.join(self._base_path, storage_id) yield (storage_id, storage_dir) check_true(os.path.exists(storage_dir), "Checkpoint did not create a storage directory")
def _list_directory(root: str) -> Dict[str, int]: """ Returns a dict mapping path names to file sizes for all files and subdirectories in the directory `root`. Directories are signified by a trailing "/". Returned path names are relative to `root`; directories are included but have a file size of 0. """ check_true(os.path.isdir(root), "{} must be an extant directory".format(root)) result = {} for cur_path, sub_dirs, files in os.walk(root): for d in sub_dirs: abs_path = os.path.join(cur_path, d) rel_path = os.path.relpath(abs_path, root) + "/" result[rel_path] = 0 for f in files: abs_path = os.path.join(cur_path, f) rel_path = os.path.relpath(abs_path, root) result[rel_path] = os.path.getsize(abs_path) return result
def restore_path(self, metadata: StorageMetadata) -> Generator[str, None, None]: """ Prepare a local directory exposing the checkpoint. This base implementation does some simple checks to make sure the checkpoint has been prepared properly, but subclasses whose storage backends are in remote places are responsible for downloading the checkpoint before calling this method and deleting the temporary checkpoint directory after it is no longer useful. """ storage_dir = os.path.join(self._base_path, metadata.storage_id) check_true( os.path.exists(storage_dir), "Storage directory does not exist: {}. Please verify " "that you are using the correct configuration value for " "checkpoint_storage.host_path and " "tensorboard_storage.host_path.".format(storage_dir), ) check_true( os.path.isdir(storage_dir), "Checkpoint path is not a directory: {}".format(storage_dir)) yield storage_dir