def test_file_ops(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() with pytest.raises(FileNotFoundError): fs.info(tmpdir + "/nofile") fs.touch(tmpdir + "/afile") i1 = fs.ukey(tmpdir + "/afile") assert tmpdir + "/afile" in fs.ls(tmpdir) with fs.open(tmpdir + "/afile", "wb") as f: f.write(b"data") i2 = fs.ukey(tmpdir + "/afile") assert i1 != i2 # because file changed fs.copy(tmpdir + "/afile", tmpdir + "/afile2") assert tmpdir + "/afile2" in fs.ls(tmpdir) fs.move(tmpdir + "/afile", tmpdir + "/afile3") assert not fs.exists(tmpdir + "/afile") fs.rm(tmpdir + "/afile3", recursive=True) assert not fs.exists(tmpdir + "/afile3") fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir)
def test_file_ops(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() with pytest.raises(FileNotFoundError): fs.info(tmpdir + '/nofile') fs.touch(tmpdir + '/afile') i1 = fs.ukey(tmpdir + '/afile') assert tmpdir + '/afile' in fs.ls(tmpdir) with fs.open(tmpdir + '/afile', 'wb') as f: f.write(b'data') i2 = fs.ukey(tmpdir + '/afile') assert i1 != i2 # because file changed fs.copy(tmpdir + '/afile', tmpdir + '/afile2') assert tmpdir + '/afile2' in fs.ls(tmpdir) fs.move(tmpdir + '/afile', tmpdir + '/afile3') assert not fs.exists(tmpdir + '/afile') fs.rm(tmpdir + '/afile3', recursive=True) assert not fs.exists(tmpdir + '/afile3') fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir)
def install_bids(sourcedata_dir: PathLike, bids_filename: PathLike) -> None: from pathlib import Path from fsspec.implementations.local import LocalFileSystem fs = LocalFileSystem(auto_mkdir=True) source_file = fs.open(fs.ls(sourcedata_dir)[0], mode="rb") target_file = fs.open(bids_filename, mode="wb") with source_file as sf, target_file as tf: tf.write(sf.read()) source_basename = Path(Path(Path(fs.ls(sourcedata_dir)[0]).stem).stem) target_basename = Path(bids_filename.stem).stem # The following part adds the sidecar files related to the nifti with the same name: it can be tsv or json files. # It may or may not be used, since there might not be any sidecars. sidecar_dir = sourcedata_dir.parent / "BIDS" for source_sidecar in sidecar_dir.rglob(f"{source_basename}*"): target_sidecar = Path.joinpath( bids_filename.parent, target_basename).with_name( f"{target_basename}{source_sidecar.suffix}") source_file = fs.open(source_sidecar, mode="rb") target_file = fs.open(target_sidecar, mode="wb") with source_file as sf, target_file as tf: tf.write(sf.read())
def test_directories(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() fs.mkdir(tmpdir + "/dir") assert tmpdir + "/dir" in fs.ls(tmpdir) assert fs.ls(tmpdir, True)[0]["type"] == "directory" fs.rmdir(tmpdir + "/dir") assert not fs.ls(tmpdir)
def test_directories(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() fs.mkdir(tmpdir + '/dir') assert tmpdir + '/dir' in fs.ls(tmpdir) assert fs.ls(tmpdir, True)[0]['type'] == 'directory' fs.rmdir(tmpdir + '/dir') assert not fs.ls(tmpdir)
def test_directories(tmpdir): tmpdir = make_path_posix(str(tmpdir)) fs = LocalFileSystem() fs.mkdir(tmpdir + "/dir") assert tmpdir + "/dir" in fs.ls(tmpdir) assert fs.ls(tmpdir, True)[0]["type"] == "directory" fs.rmdir(tmpdir + "/dir") assert not fs.ls(tmpdir) assert fs.ls(fs.root_marker)
def test_file_ops(tmpdir): tmpdir = make_path_posix(str(tmpdir)) fs = LocalFileSystem(auto_mkdir=True) with pytest.raises(FileNotFoundError): fs.info(tmpdir + "/nofile") fs.touch(tmpdir + "/afile") i1 = fs.ukey(tmpdir + "/afile") assert tmpdir + "/afile" in fs.ls(tmpdir) with fs.open(tmpdir + "/afile", "wb") as f: f.write(b"data") i2 = fs.ukey(tmpdir + "/afile") assert i1 != i2 # because file changed fs.copy(tmpdir + "/afile", tmpdir + "/afile2") assert tmpdir + "/afile2" in fs.ls(tmpdir) fs.move(tmpdir + "/afile", tmpdir + "/afile3") assert not fs.exists(tmpdir + "/afile") fs.cp(tmpdir + "/afile3", tmpdir + "/deeply/nested/file") assert fs.exists(tmpdir + "/deeply/nested/file") fs.rm(tmpdir + "/afile3", recursive=True) assert not fs.exists(tmpdir + "/afile3") files = [tmpdir + "/afile4", tmpdir + "/afile5"] [fs.touch(f) for f in files] with pytest.raises(TypeError): fs.rm_file(files) fs.rm(files) assert all(not fs.exists(f) for f in files) fs.touch(tmpdir + "/afile6") fs.rm_file(tmpdir + "/afile6") assert not fs.exists(tmpdir + "/afile6") # IsADirectoryError raised on Linux, PermissionError on Windows with pytest.raises((IsADirectoryError, PermissionError)): fs.rm_file(tmpdir) fs.rm(tmpdir, recursive=True) assert not fs.exists(tmpdir)
def install_nifti(sourcedata_dir: PathLike, bids_filename: PathLike) -> None: from fsspec.implementations.local import LocalFileSystem fs = LocalFileSystem(auto_mkdir=True) source_file = fs.open(fs.ls(sourcedata_dir)[0], mode="rb") target_file = fs.open(bids_filename, mode="wb", compression="gzip") with source_file as sf, target_file as tf: tf.write(sf.read())
class FsspecLocalFileSystem(AbstractFileSystem): sep = os.sep def __init__(self, *args, **kwargs): from fsspec.implementations.local import LocalFileSystem as LocalFS super().__init__(*args, **kwargs) self.fs = LocalFS() def makedirs(self, path, exist_ok=False): makedirs(path, exist_ok=exist_ok) def mkdir(self, path, create_parents=True, **kwargs): if self.exists(path): raise FileExistsError(path) if create_parents: self.makedirs(path, exist_ok=True) else: os.mkdir(path, **kwargs) def lexists(self, path, **kwargs): return os.path.lexists(path) def exists(self, path, **kwargs): # TODO: replace this with os.path.exists once the problem is fixed on # the fsspec https://github.com/intake/filesystem_spec/issues/742 return os.path.lexists(path) def checksum(self, path) -> str: from fsspec.utils import tokenize st = os.stat(path) return str(int(tokenize([st.st_ino, st.st_mtime, st.st_size]), 16)) def info(self, path, **kwargs): return self.fs.info(path) def ls(self, path, **kwargs): return self.fs.ls(path, **kwargs) def isfile(self, path) -> bool: return os.path.isfile(path) def isdir(self, path) -> bool: return os.path.isdir(path) def walk(self, path, maxdepth=None, topdown=True, **kwargs): """Directory fs generator. See `os.walk` for the docs. Differences: - no support for symlinks """ for root, dirs, files in os.walk( path, topdown=topdown, ): yield os.path.normpath(root), dirs, files def find(self, path, **kwargs): for root, _, files in self.walk(path, **kwargs): for file in files: # NOTE: os.path.join is ~5.5 times slower yield f"{root}{os.sep}{file}" @classmethod def _parent(cls, path): return os.path.dirname(path) def put_file(self, lpath, rpath, callback=None, **kwargs): parent = self._parent(rpath) makedirs(parent, exist_ok=True) tmp_file = os.path.join(parent, tmp_fname()) copyfile(lpath, tmp_file, callback=callback) os.replace(tmp_file, rpath) def get_file(self, rpath, lpath, callback=None, **kwargs): copyfile(rpath, lpath, callback=callback) def mv(self, path1, path2, **kwargs): self.makedirs(self._parent(path2), exist_ok=True) move(path1, path2) def rmdir(self, path): os.rmdir(path) def rm_file(self, path): remove(path) def rm(self, path, recursive=False, maxdepth=None): remove(path) def copy(self, path1, path2, recursive=False, on_error=None, **kwargs): tmp_info = os.path.join(self._parent(path2), tmp_fname("")) try: copyfile(path1, tmp_info) os.rename(tmp_info, path2) except Exception: self.rm_file(tmp_info) raise def open(self, path, mode="r", encoding=None, **kwargs): return open(path, mode=mode, encoding=encoding) def symlink(self, path1, path2): return System.symlink(path1, path2) @staticmethod def is_symlink(path): return System.is_symlink(path) @staticmethod def is_hardlink(path): return System.is_hardlink(path) def hardlink(self, path1, path2): # If there are a lot of empty files (which happens a lot in datasets), # and the cache type is `hardlink`, we might reach link limits and # will get something like: `too many links error` # # This is because all those empty files will have the same hash # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be # linked to the same file in the cache. # # From https://en.wikipedia.org/wiki/Hard_link # * ext4 limits the number of hard links on a file to 65,000 # * Windows with NTFS has a limit of 1024 hard links on a file # # That's why we simply create an empty file rather than a link. if self.size(path1) == 0: self.open(path2, "w").close() logger.debug("Created empty file: %s -> %s", path1, path2) return return System.hardlink(path1, path2) def reflink(self, path1, path2): return System.reflink(path1, path2)
class FileSystem(AbstractFileSystem): """Wrapper for easier initialization of various file-system classes""" def __init__(self, name="local", assumed_role=None, endpoint_url=None): super().__init__() self.name = name self.assume_client = None self.assume_role = assumed_role self.endpoint_url = endpoint_url if self.name == "local": self.filesystem = LocalFileSystem() elif self.name == "s3": session = botocore.session.get_session() if self.assume_role: self.assume_client = session.create_client("sts") session_credentials = ( botocore.credentials.RefreshableCredentials.create_from_metadata( metadata=self._sts_refresh(), refresh_using=self._sts_refresh, method="sts-assume-role", ) ) session._credentials = session_credentials client_kwargs = {"endpoint_url": endpoint_url} if endpoint_url else None self.filesystem = s3fs.S3FileSystem( session=session, client_kwargs=client_kwargs ) else: raise ValueError(f"Unsupported FileReader type: {type}") for method_name, method in inspect.getmembers( self.filesystem, predicate=inspect.ismethod ): if method_name not in ( "__init__", "_rm", "cp_file", "created", "ls", "modified", "sign", ): setattr(self, method_name, method) def _sts_refresh(self): """Refresh tokens by calling assume_role again""" response = self.assume_client.assume_role( RoleArn=self.assume_role, RoleSessionName=f"data-toolz-filesystem-s3-{uuid4()}", DurationSeconds=3600, ).get("Credentials") return { "access_key": response.get("AccessKeyId"), "secret_key": response.get("SecretAccessKey"), "token": response.get("SessionToken"), "expiry_time": response.get("Expiration").isoformat(), } def _rm(self, path): return self.filesystem.rm(path=path) def cp_file(self, path1, path2, **kwargs): return self.filesystem.copy(path1=path1, path2=path2, **kwargs) def created(self, path): return self.filesystem.created(path=path) def ls(self, path, detail=True, **kwargs): return self.filesystem.ls(path=path, detail=detail, **kwargs) def modified(self, path): return self.filesystem.modified(path=path) def sign(self, path, expiration=100, **kwargs): return self.filesystem.sign(path=path, expiration=expiration, **kwargs)