Example #1
0
def test_file_ops(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + "/nofile")
    fs.touch(tmpdir + "/afile")
    i1 = fs.ukey(tmpdir + "/afile")

    assert tmpdir + "/afile" in fs.ls(tmpdir)

    with fs.open(tmpdir + "/afile", "wb") as f:
        f.write(b"data")
    i2 = fs.ukey(tmpdir + "/afile")
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + "/afile", tmpdir + "/afile2")
    assert tmpdir + "/afile2" in fs.ls(tmpdir)

    fs.move(tmpdir + "/afile", tmpdir + "/afile3")
    assert not fs.exists(tmpdir + "/afile")

    fs.rm(tmpdir + "/afile3", recursive=True)
    assert not fs.exists(tmpdir + "/afile3")

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Example #2
0
def test_file_ops(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + '/nofile')
    fs.touch(tmpdir + '/afile')
    i1 = fs.ukey(tmpdir + '/afile')

    assert tmpdir + '/afile' in fs.ls(tmpdir)

    with fs.open(tmpdir + '/afile', 'wb') as f:
        f.write(b'data')
    i2 = fs.ukey(tmpdir + '/afile')
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + '/afile', tmpdir + '/afile2')
    assert tmpdir + '/afile2' in fs.ls(tmpdir)

    fs.move(tmpdir + '/afile', tmpdir + '/afile3')
    assert not fs.exists(tmpdir + '/afile')

    fs.rm(tmpdir + '/afile3', recursive=True)
    assert not fs.exists(tmpdir + '/afile3')

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Example #3
0
def test_linked_files_exists(tmpdir):
    origin = tmpdir / "original"
    copy_file = tmpdir / "copy"

    fs = LocalFileSystem()
    fs.touch(origin)

    try:
        os.symlink(origin, copy_file)
    except OSError:
        if WIN:
            pytest.xfail("Ran on win without admin permissions")
        else:
            raise

    assert fs.exists(copy_file)
    assert fs.lexists(copy_file)

    os.unlink(origin)

    assert not fs.exists(copy_file)
    assert fs.lexists(copy_file)

    os.unlink(copy_file)

    assert not fs.exists(copy_file)
    assert not fs.lexists(copy_file)
Example #4
0
def filesystem() -> AbstractFileSystem:
    fs = LocalFileSystem()

    endpoint_url = os.getenv("LIGHTNING_BUCKET_ENDPOINT_URL", "")
    bucket_name = os.getenv("LIGHTNING_BUCKET_NAME", "")
    if endpoint_url != "" and bucket_name != "":
        key = os.getenv("LIGHTNING_AWS_ACCESS_KEY_ID", "")
        secret = os.getenv("LIGHTNING_AWS_SECRET_ACCESS_KEY", "")
        # TODO: Remove when updated on the platform side.
        if key == "" or secret == "":
            key = os.getenv("AWS_ACCESS_KEY_ID", "")
            secret = os.getenv("AWS_SECRET_ACCESS_KEY", "")
        if key == "" or secret == "":
            raise RuntimeError("missing S3 bucket credentials")

        fs = S3FileSystem(key=key,
                          secret=secret,
                          use_ssl=False,
                          client_kwargs={"endpoint_url": endpoint_url})

        app_id = os.getenv("LIGHTNING_CLOUD_APP_ID", "")
        if app_id == "":
            raise RuntimeError("missing LIGHTNING_CLOUD_APP_ID")

        if not fs.exists(shared_storage_path()):
            raise RuntimeError(
                f"shared filesystem {shared_storage_path()} does not exist")

    return fs
def test_commit_discard(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with fs.transaction:
        with fs.open(tmpdir + "/afile", "wb") as f:
            assert not fs.exists(tmpdir + "/afile")
            f.write(b"data")
        assert not fs.exists(tmpdir + "/afile")
    assert fs.cat(tmpdir + "/afile") == b"data"

    try:
        with fs.transaction:
            with fs.open(tmpdir + "/bfile", "wb") as f:
                f.write(b"data")
            raise KeyboardInterrupt
    except KeyboardInterrupt:
        assert not fs.exists(tmpdir + "/bfile")
Example #6
0
def test_commit_discard(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with fs.transaction:
        with fs.open(tmpdir + '/afile', 'wb') as f:
            assert not fs.exists(tmpdir + '/afile')
            f.write(b'data')
        assert not fs.exists(tmpdir + '/afile')
    assert fs.cat(tmpdir + '/afile') == b'data'

    try:
        with fs.transaction:
            with fs.open(tmpdir + '/bfile', 'wb') as f:
                f.write(b'data')
            raise KeyboardInterrupt
    except KeyboardInterrupt:
        assert not fs.exists(tmpdir + '/bfile')
Example #7
0
def convert_dicom(sourcedata_dir: PathLike, bids_filename: PathLike) -> None:
    import subprocess
    from pathlib import Path

    from fsspec.implementations.local import LocalFileSystem

    output_fmt = str(Path(bids_filename).name).replace(".nii.gz", "")
    output_dir = str(Path(bids_filename).parent)

    # Ensure output directory is empty.
    fs = LocalFileSystem()
    if fs.exists(output_dir):
        fs.rm(output_dir, recursive=True)
    fs.makedirs(output_dir)

    # Run conversion with dcm2niix with anonymization and maximum compression.
    subprocess.run(
        f"dcm2niix -9 -b y -ba y -f {output_fmt} -o {output_dir} -z i {sourcedata_dir}",
        shell=True,
    )
Example #8
0
def test_file_ops(tmpdir):
    tmpdir = make_path_posix(str(tmpdir))
    fs = LocalFileSystem(auto_mkdir=True)
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + "/nofile")
    fs.touch(tmpdir + "/afile")
    i1 = fs.ukey(tmpdir + "/afile")

    assert tmpdir + "/afile" in fs.ls(tmpdir)

    with fs.open(tmpdir + "/afile", "wb") as f:
        f.write(b"data")
    i2 = fs.ukey(tmpdir + "/afile")
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + "/afile", tmpdir + "/afile2")
    assert tmpdir + "/afile2" in fs.ls(tmpdir)

    fs.move(tmpdir + "/afile", tmpdir + "/afile3")
    assert not fs.exists(tmpdir + "/afile")

    fs.cp(tmpdir + "/afile3", tmpdir + "/deeply/nested/file")
    assert fs.exists(tmpdir + "/deeply/nested/file")

    fs.rm(tmpdir + "/afile3", recursive=True)
    assert not fs.exists(tmpdir + "/afile3")

    files = [tmpdir + "/afile4", tmpdir + "/afile5"]
    [fs.touch(f) for f in files]

    with pytest.raises(TypeError):
        fs.rm_file(files)
    fs.rm(files)
    assert all(not fs.exists(f) for f in files)

    fs.touch(tmpdir + "/afile6")
    fs.rm_file(tmpdir + "/afile6")
    assert not fs.exists(tmpdir + "/afile6")

    # IsADirectoryError raised on Linux, PermissionError on Windows
    with pytest.raises((IsADirectoryError, PermissionError)):
        fs.rm_file(tmpdir)

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Example #9
0
File: local.py Project: kcak11/dvc
class LocalFileSystem(BaseFileSystem):
    scheme = Schemes.LOCAL
    PATH_CLS = PathInfo
    PARAM_CHECKSUM = "md5"
    PARAM_PATH = "path"
    TRAVERSE_PREFIX_LEN = 2

    def __init__(self, **config):
        from fsspec.implementations.local import LocalFileSystem as LocalFS

        super().__init__(**config)
        self.fs = LocalFS()

    @staticmethod
    def open(path_info, mode="r", encoding=None, **kwargs):
        return open(path_info, mode=mode, encoding=encoding)

    def exists(self, path_info) -> bool:
        assert isinstance(path_info, str) or path_info.scheme == "local"
        return self.fs.exists(path_info)

    def isfile(self, path_info) -> bool:
        return os.path.isfile(path_info)

    def isdir(self, path_info) -> bool:
        return os.path.isdir(path_info)

    def iscopy(self, path_info):
        return not (System.is_symlink(path_info)
                    or System.is_hardlink(path_info))

    def walk(self, top, topdown=True, onerror=None, **kwargs):
        """Directory fs generator.

        See `os.walk` for the docs. Differences:
        - no support for symlinks
        """
        for root, dirs, files in os.walk(top, topdown=topdown,
                                         onerror=onerror):
            yield os.path.normpath(root), dirs, files

    def walk_files(self, path_info, **kwargs):
        for root, _, files in self.walk(path_info):
            for file in files:
                # NOTE: os.path.join is ~5.5 times slower
                yield PathInfo(f"{root}{os.sep}{file}")

    def is_empty(self, path_info):
        if self.isfile(path_info) and os.path.getsize(path_info) == 0:
            return True

        if self.isdir(path_info) and len(os.listdir(path_info)) == 0:
            return True

        return False

    def remove(self, path_info):
        if isinstance(path_info, PathInfo):
            if path_info.scheme != "local":
                raise NotImplementedError
        remove(path_info)

    def makedirs(self, path_info):
        makedirs(path_info, exist_ok=True)

    def isexec(self, path_info):
        mode = self.stat(path_info).st_mode
        return is_exec(mode)

    def stat(self, path):
        return os.stat(path)

    def move(self, from_info, to_info):
        if from_info.scheme != "local" or to_info.scheme != "local":
            raise NotImplementedError

        self.makedirs(to_info.parent)
        move(from_info, to_info)

    def copy(self, from_info, to_info):
        tmp_info = to_info.parent / tmp_fname("")
        try:
            copyfile(from_info, tmp_info)
            os.rename(tmp_info, to_info)
        except Exception:
            self.remove(tmp_info)
            raise

    def _upload_fobj(self, fobj, to_info):
        self.makedirs(to_info.parent)
        tmp_info = to_info.parent / tmp_fname("")
        try:
            copy_fobj_to_file(fobj, tmp_info)
            os.rename(tmp_info, to_info)
        except Exception:
            self.remove(tmp_info)
            raise

    @staticmethod
    def symlink(from_info, to_info):
        System.symlink(from_info, to_info)

    @staticmethod
    def is_symlink(path_info):
        return System.is_symlink(path_info)

    def hardlink(self, from_info, to_info):
        # If there are a lot of empty files (which happens a lot in datasets),
        # and the cache type is `hardlink`, we might reach link limits and
        # will get something like: `too many links error`
        #
        # This is because all those empty files will have the same hash
        # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be
        # linked to the same file in the cache.
        #
        # From https://en.wikipedia.org/wiki/Hard_link
        #   * ext4 limits the number of hard links on a file to 65,000
        #   * Windows with NTFS has a limit of 1024 hard links on a file
        #
        # That's why we simply create an empty file rather than a link.
        if self.getsize(from_info) == 0:
            self.open(to_info, "w").close()

            logger.debug("Created empty file: %s -> %s", from_info, to_info)
            return

        System.hardlink(from_info, to_info)

    @staticmethod
    def is_hardlink(path_info):
        return System.is_hardlink(path_info)

    def reflink(self, from_info, to_info):
        System.reflink(from_info, to_info)

    @staticmethod
    def info(path_info):
        st = os.stat(path_info)
        return {
            "size": st.st_size,
            "type": "dir" if stat.S_ISDIR(st.st_mode) else "file",
        }

    def _upload(self,
                from_file,
                to_info,
                name=None,
                no_progress_bar=False,
                **_kwargs):
        makedirs(to_info.parent, exist_ok=True)

        tmp_file = tmp_fname(to_info)
        copyfile(from_file,
                 tmp_file,
                 name=name,
                 no_progress_bar=no_progress_bar)
        os.replace(tmp_file, to_info)

    @staticmethod
    def _download(from_info,
                  to_file,
                  name=None,
                  no_progress_bar=False,
                  **_kwargs):
        copyfile(from_info,
                 to_file,
                 no_progress_bar=no_progress_bar,
                 name=name)
Example #10
0
 def __exit__(self, *_):
     # cleanup!
     fs = LocalFileSystem()
     if fs.exists(self.tmp_path):
         fs.rm(self.tmp_path, recursive=True)
Example #11
0
def test_info_path_like(tmpdir):
    path = Path(tmpdir / "test_info")
    path.write_text("fsspec")

    fs = LocalFileSystem()
    assert fs.exists(path)