def test_file_ops(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() with pytest.raises(FileNotFoundError): fs.info(tmpdir + "/nofile") fs.touch(tmpdir + "/afile") i1 = fs.ukey(tmpdir + "/afile") assert tmpdir + "/afile" in fs.ls(tmpdir) with fs.open(tmpdir + "/afile", "wb") as f: f.write(b"data") i2 = fs.ukey(tmpdir + "/afile") assert i1 != i2 # because file changed fs.copy(tmpdir + "/afile", tmpdir + "/afile2") assert tmpdir + "/afile2" in fs.ls(tmpdir) fs.move(tmpdir + "/afile", tmpdir + "/afile3") assert not fs.exists(tmpdir + "/afile") fs.rm(tmpdir + "/afile3", recursive=True) assert not fs.exists(tmpdir + "/afile3") fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir)
def test_file_ops(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() with pytest.raises(FileNotFoundError): fs.info(tmpdir + '/nofile') fs.touch(tmpdir + '/afile') i1 = fs.ukey(tmpdir + '/afile') assert tmpdir + '/afile' in fs.ls(tmpdir) with fs.open(tmpdir + '/afile', 'wb') as f: f.write(b'data') i2 = fs.ukey(tmpdir + '/afile') assert i1 != i2 # because file changed fs.copy(tmpdir + '/afile', tmpdir + '/afile2') assert tmpdir + '/afile2' in fs.ls(tmpdir) fs.move(tmpdir + '/afile', tmpdir + '/afile3') assert not fs.exists(tmpdir + '/afile') fs.rm(tmpdir + '/afile3', recursive=True) assert not fs.exists(tmpdir + '/afile3') fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir)
def test_linked_files_exists(tmpdir): origin = tmpdir / "original" copy_file = tmpdir / "copy" fs = LocalFileSystem() fs.touch(origin) try: os.symlink(origin, copy_file) except OSError: if WIN: pytest.xfail("Ran on win without admin permissions") else: raise assert fs.exists(copy_file) assert fs.lexists(copy_file) os.unlink(origin) assert not fs.exists(copy_file) assert fs.lexists(copy_file) os.unlink(copy_file) assert not fs.exists(copy_file) assert not fs.lexists(copy_file)
def filesystem() -> AbstractFileSystem: fs = LocalFileSystem() endpoint_url = os.getenv("LIGHTNING_BUCKET_ENDPOINT_URL", "") bucket_name = os.getenv("LIGHTNING_BUCKET_NAME", "") if endpoint_url != "" and bucket_name != "": key = os.getenv("LIGHTNING_AWS_ACCESS_KEY_ID", "") secret = os.getenv("LIGHTNING_AWS_SECRET_ACCESS_KEY", "") # TODO: Remove when updated on the platform side. if key == "" or secret == "": key = os.getenv("AWS_ACCESS_KEY_ID", "") secret = os.getenv("AWS_SECRET_ACCESS_KEY", "") if key == "" or secret == "": raise RuntimeError("missing S3 bucket credentials") fs = S3FileSystem(key=key, secret=secret, use_ssl=False, client_kwargs={"endpoint_url": endpoint_url}) app_id = os.getenv("LIGHTNING_CLOUD_APP_ID", "") if app_id == "": raise RuntimeError("missing LIGHTNING_CLOUD_APP_ID") if not fs.exists(shared_storage_path()): raise RuntimeError( f"shared filesystem {shared_storage_path()} does not exist") return fs
def test_commit_discard(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() with fs.transaction: with fs.open(tmpdir + "/afile", "wb") as f: assert not fs.exists(tmpdir + "/afile") f.write(b"data") assert not fs.exists(tmpdir + "/afile") assert fs.cat(tmpdir + "/afile") == b"data" try: with fs.transaction: with fs.open(tmpdir + "/bfile", "wb") as f: f.write(b"data") raise KeyboardInterrupt except KeyboardInterrupt: assert not fs.exists(tmpdir + "/bfile")
def test_commit_discard(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() with fs.transaction: with fs.open(tmpdir + '/afile', 'wb') as f: assert not fs.exists(tmpdir + '/afile') f.write(b'data') assert not fs.exists(tmpdir + '/afile') assert fs.cat(tmpdir + '/afile') == b'data' try: with fs.transaction: with fs.open(tmpdir + '/bfile', 'wb') as f: f.write(b'data') raise KeyboardInterrupt except KeyboardInterrupt: assert not fs.exists(tmpdir + '/bfile')
def convert_dicom(sourcedata_dir: PathLike, bids_filename: PathLike) -> None: import subprocess from pathlib import Path from fsspec.implementations.local import LocalFileSystem output_fmt = str(Path(bids_filename).name).replace(".nii.gz", "") output_dir = str(Path(bids_filename).parent) # Ensure output directory is empty. fs = LocalFileSystem() if fs.exists(output_dir): fs.rm(output_dir, recursive=True) fs.makedirs(output_dir) # Run conversion with dcm2niix with anonymization and maximum compression. subprocess.run( f"dcm2niix -9 -b y -ba y -f {output_fmt} -o {output_dir} -z i {sourcedata_dir}", shell=True, )
def test_file_ops(tmpdir): tmpdir = make_path_posix(str(tmpdir)) fs = LocalFileSystem(auto_mkdir=True) with pytest.raises(FileNotFoundError): fs.info(tmpdir + "/nofile") fs.touch(tmpdir + "/afile") i1 = fs.ukey(tmpdir + "/afile") assert tmpdir + "/afile" in fs.ls(tmpdir) with fs.open(tmpdir + "/afile", "wb") as f: f.write(b"data") i2 = fs.ukey(tmpdir + "/afile") assert i1 != i2 # because file changed fs.copy(tmpdir + "/afile", tmpdir + "/afile2") assert tmpdir + "/afile2" in fs.ls(tmpdir) fs.move(tmpdir + "/afile", tmpdir + "/afile3") assert not fs.exists(tmpdir + "/afile") fs.cp(tmpdir + "/afile3", tmpdir + "/deeply/nested/file") assert fs.exists(tmpdir + "/deeply/nested/file") fs.rm(tmpdir + "/afile3", recursive=True) assert not fs.exists(tmpdir + "/afile3") files = [tmpdir + "/afile4", tmpdir + "/afile5"] [fs.touch(f) for f in files] with pytest.raises(TypeError): fs.rm_file(files) fs.rm(files) assert all(not fs.exists(f) for f in files) fs.touch(tmpdir + "/afile6") fs.rm_file(tmpdir + "/afile6") assert not fs.exists(tmpdir + "/afile6") # IsADirectoryError raised on Linux, PermissionError on Windows with pytest.raises((IsADirectoryError, PermissionError)): fs.rm_file(tmpdir) fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir)
class LocalFileSystem(BaseFileSystem): scheme = Schemes.LOCAL PATH_CLS = PathInfo PARAM_CHECKSUM = "md5" PARAM_PATH = "path" TRAVERSE_PREFIX_LEN = 2 def __init__(self, **config): from fsspec.implementations.local import LocalFileSystem as LocalFS super().__init__(**config) self.fs = LocalFS() @staticmethod def open(path_info, mode="r", encoding=None, **kwargs): return open(path_info, mode=mode, encoding=encoding) def exists(self, path_info) -> bool: assert isinstance(path_info, str) or path_info.scheme == "local" return self.fs.exists(path_info) def isfile(self, path_info) -> bool: return os.path.isfile(path_info) def isdir(self, path_info) -> bool: return os.path.isdir(path_info) def iscopy(self, path_info): return not (System.is_symlink(path_info) or System.is_hardlink(path_info)) def walk(self, top, topdown=True, onerror=None, **kwargs): """Directory fs generator. See `os.walk` for the docs. Differences: - no support for symlinks """ for root, dirs, files in os.walk(top, topdown=topdown, onerror=onerror): yield os.path.normpath(root), dirs, files def walk_files(self, path_info, **kwargs): for root, _, files in self.walk(path_info): for file in files: # NOTE: os.path.join is ~5.5 times slower yield PathInfo(f"{root}{os.sep}{file}") def is_empty(self, path_info): if self.isfile(path_info) and os.path.getsize(path_info) == 0: return True if self.isdir(path_info) and len(os.listdir(path_info)) == 0: return True return False def remove(self, path_info): if isinstance(path_info, PathInfo): if path_info.scheme != "local": raise NotImplementedError remove(path_info) def makedirs(self, path_info): makedirs(path_info, exist_ok=True) def isexec(self, path_info): mode = self.stat(path_info).st_mode return is_exec(mode) def stat(self, path): return os.stat(path) def move(self, from_info, to_info): if from_info.scheme != "local" or to_info.scheme != "local": raise NotImplementedError self.makedirs(to_info.parent) move(from_info, to_info) def copy(self, from_info, to_info): tmp_info = to_info.parent / tmp_fname("") try: copyfile(from_info, tmp_info) os.rename(tmp_info, to_info) except Exception: self.remove(tmp_info) raise def _upload_fobj(self, fobj, to_info): self.makedirs(to_info.parent) tmp_info = to_info.parent / tmp_fname("") try: copy_fobj_to_file(fobj, tmp_info) os.rename(tmp_info, to_info) except Exception: self.remove(tmp_info) raise @staticmethod def symlink(from_info, to_info): System.symlink(from_info, to_info) @staticmethod def is_symlink(path_info): return System.is_symlink(path_info) def hardlink(self, from_info, to_info): # If there are a lot of empty files (which happens a lot in datasets), # and the cache type is `hardlink`, we might reach link limits and # will get something like: `too many links error` # # This is because all those empty files will have the same hash # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be # linked to the same file in the cache. # # From https://en.wikipedia.org/wiki/Hard_link # * ext4 limits the number of hard links on a file to 65,000 # * Windows with NTFS has a limit of 1024 hard links on a file # # That's why we simply create an empty file rather than a link. if self.getsize(from_info) == 0: self.open(to_info, "w").close() logger.debug("Created empty file: %s -> %s", from_info, to_info) return System.hardlink(from_info, to_info) @staticmethod def is_hardlink(path_info): return System.is_hardlink(path_info) def reflink(self, from_info, to_info): System.reflink(from_info, to_info) @staticmethod def info(path_info): st = os.stat(path_info) return { "size": st.st_size, "type": "dir" if stat.S_ISDIR(st.st_mode) else "file", } def _upload(self, from_file, to_info, name=None, no_progress_bar=False, **_kwargs): makedirs(to_info.parent, exist_ok=True) tmp_file = tmp_fname(to_info) copyfile(from_file, tmp_file, name=name, no_progress_bar=no_progress_bar) os.replace(tmp_file, to_info) @staticmethod def _download(from_info, to_file, name=None, no_progress_bar=False, **_kwargs): copyfile(from_info, to_file, no_progress_bar=no_progress_bar, name=name)
def __exit__(self, *_): # cleanup! fs = LocalFileSystem() if fs.exists(self.tmp_path): fs.rm(self.tmp_path, recursive=True)
def test_info_path_like(tmpdir): path = Path(tmpdir / "test_info") path.write_text("fsspec") fs = LocalFileSystem() assert fs.exists(path)