def _path_to_dir_key(self, path): """Converts an fs path to a s3 key.""" _path = relpath(normpath(path)) _key = forcedir("{}/{}".format(self._prefix, _path)).lstrip('/').replace( '/', self.delimiter) return _key
def fix_storage(self) -> None: # TODO test """Utility function that walks the entire `root_path` and makes sure that all intermediate directories are correctly marked with empty blobs. As GCS is no real file system but only a key-value store, there is also no concept of folders. S3FS and GCSFS overcome this limitation by adding empty files with the name "<path>/" every time a directory is created, see https://fs-gcsfs.readthedocs.io/en/latest/#limitations. """ names = [ blob.name for blob in self.bucket.list_blobs(prefix=self.root_path) ] marked_dirs = set() all_dirs = set() for name in names: # If a blob ends with a slash, it's a directory marker if name.endswith("/"): marked_dirs.add(dirname(name)) name = dirname(name) while name != self.root_path: all_dirs.add(name) name = dirname(name) if forcedir(self.root_path) != "/": all_dirs.add(self.root_path) unmarked_dirs = all_dirs.difference(marked_dirs) logger.info("{} directories in total".format(len(all_dirs))) if len(unmarked_dirs) > 0: logger.info("{} directories are not yet marked correctly".format( len(unmarked_dirs))) for unmarked_dir in unmarked_dirs: dir_name = forcedir(unmarked_dir) logger.debug("Creating directory marker " + dir_name) blob = self.bucket.blob(dir_name) blob.upload_from_string(b"") logger.info("Successfully created {} directory markers".format( len(unmarked_dirs))) else: logger.info("All directories are correctly marked")
def templates_directory(self): if self._templates_directory is None: self._templates_directory = forcedir(abspath(self.system_settings['templates_directory'])) return self._templates_directory
def templates_directory(self): if self._templates_directory is None: self._templates_directory = forcedir( abspath(self.system_settings['templates_directory'])) return self._templates_directory
def format_dir(path): nameonly = path[prefix_len:].lstrip("/") return forcedir(nameonly)
def _path_to_dir_key(self, path): """Converts an fs path to a Datalake dir path.""" _path = relpath(normpath(path)) _key = (forcedir("{}/{}".format(self._prefix, _path)).lstrip("/")) return _key
def test_forcedir(self): self.assertEqual(forcedir("foo"), "foo/") self.assertEqual(forcedir("foo/"), "foo/")