Example #1
0
def test_strip_protocol_expanduser():
    path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar"
    stripped = LocalFileSystem._strip_protocol(path)
    assert path != stripped
    assert "file://" not in stripped
    assert stripped.startswith(os.path.expanduser("~").replace("\\", "/"))
    assert not LocalFileSystem._strip_protocol("./").endswith("/")
Example #2
0
def test_linked_files_exists(tmpdir):
    origin = tmpdir / "original"
    copy_file = tmpdir / "copy"

    fs = LocalFileSystem()
    fs.touch(origin)

    try:
        os.symlink(origin, copy_file)
    except OSError:
        if WIN:
            pytest.xfail("Ran on win without admin permissions")
        else:
            raise

    assert fs.exists(copy_file)
    assert fs.lexists(copy_file)

    os.unlink(origin)

    assert not fs.exists(copy_file)
    assert fs.lexists(copy_file)

    os.unlink(copy_file)

    assert not fs.exists(copy_file)
    assert not fs.lexists(copy_file)
Example #3
0
def test_isdir():
    fs = LocalFileSystem()
    with filetexts(files, mode="b"):
        for f in files.keys():
            assert fs.isdir(os.path.dirname(os.path.abspath(f)))
            assert not fs.isdir(f)
        assert not fs.isdir("not-a-dir")
Example #4
0
def filesystem() -> AbstractFileSystem:
    fs = LocalFileSystem()

    endpoint_url = os.getenv("LIGHTNING_BUCKET_ENDPOINT_URL", "")
    bucket_name = os.getenv("LIGHTNING_BUCKET_NAME", "")
    if endpoint_url != "" and bucket_name != "":
        key = os.getenv("LIGHTNING_AWS_ACCESS_KEY_ID", "")
        secret = os.getenv("LIGHTNING_AWS_SECRET_ACCESS_KEY", "")
        # TODO: Remove when updated on the platform side.
        if key == "" or secret == "":
            key = os.getenv("AWS_ACCESS_KEY_ID", "")
            secret = os.getenv("AWS_SECRET_ACCESS_KEY", "")
        if key == "" or secret == "":
            raise RuntimeError("missing S3 bucket credentials")

        fs = S3FileSystem(key=key,
                          secret=secret,
                          use_ssl=False,
                          client_kwargs={"endpoint_url": endpoint_url})

        app_id = os.getenv("LIGHTNING_CLOUD_APP_ID", "")
        if app_id == "":
            raise RuntimeError("missing LIGHTNING_CLOUD_APP_ID")

        if not fs.exists(shared_storage_path()):
            raise RuntimeError(
                f"shared filesystem {shared_storage_path()} does not exist")

    return fs
Example #5
0
 def test_invalid_json(self):
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/bad_json.json')) as f:
         schema = from_file(f, {})
         assert(isinstance(schema, InvalidSchema))
         message = f"File type not supported for file {from_root('/test/sample_data/bad_json.json')}.  Type: ASCII text, with no line terminators"
         assert(message in schema.reason)
Example #6
0
 def test_valid_csv(self):
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/csv_sample.csv')) as f:
         schema = from_file(f, {"read_headers": True})
         assert(isinstance(schema, TextSchema))
         assert(list(map(lambda c: c.name, schema.columns)) == ["type","price"])
         assert(list(map(lambda c: c.type,schema.columns)) == ["object","float64"])
Example #7
0
 def test_csv_no_header(self):
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/csv_no_header.csv')) as f:
         schema = from_file(f)
         assert(isinstance(schema, TextSchema))
         assert(list(map(lambda c: c.name,schema.columns)) == [0,1])
         assert(list(map(lambda c: c.type,schema.columns)) == ["object","float64"])
Example #8
0
    def copy(self,
             lpath,
             rpath,
             recursive=False,
             callback=_DEFAULT_CALLBACK,
             **kwargs):
        """
        This method copies the contents of the local source directory to the target directory.
        This is different from the fsspec's put() because it does not copy the source folder
        to the target directory in the case when target directory already exists.
        """

        from fsspec.implementations.local import LocalFileSystem, make_path_posix
        from fsspec.utils import other_paths

        rpath = (self.fs._strip_protocol(rpath) if isinstance(rpath, str) else
                 [self.fs._strip_protocol(p) for p in rpath])
        if isinstance(lpath, str):
            lpath = make_path_posix(lpath)
        fs = LocalFileSystem()
        lpaths = fs.expand_path(lpath, recursive=recursive)
        rpaths = other_paths(lpaths, rpath)

        callback.set_size(len(rpaths))
        for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
            callback.branch(lpath, rpath, kwargs)
            self.fs.put_file(lpath, rpath, **kwargs)
Example #9
0
    def __init__(
        self,
        path: str,
        filesystem: AbstractFileSystem = None,
        time_travel: datetime = None,
    ):
        """Initializes a Delta Lake

        Retrieves rows pertaining to the given keys from the Table instance
        represented by table_handle.  String keys will be UTF-8 encoded.

        Args:
            path: the path to the table on the filesystem
            filesystem: python-like filesystem (If unset, assume local)
            time_travel: set the delta lake to a specific version

        Returns:
            An instance of a delta table.
        """
        if not filesystem:
            self.filesystem = LocalFileSystem(path)
        else:
            self.filesystem = filesystem
        self.path = path
        self._set_timestamp(time_travel)
        self.checkpoint_info = self._get_checkpoint_info()
        self.fileset = set()
Example #10
0
def write_bids(
    to: PathLike,
    participants: DataFrame,
    sessions: DataFrame,
    scans: DataFrame,
) -> List[PathLike]:
    from pathlib import Path

    from fsspec.implementations.local import LocalFileSystem

    to = Path(to)
    fs = LocalFileSystem(auto_mkdir=True)

    # Ensure BIDS hierarchy is written first.
    with fs.transaction:
        with fs.open(to / "participants.tsv", "w") as participant_file:
            write_to_tsv(participants, participant_file)

        for participant_id, sessions_group in sessions.groupby(
                "participant_id"):
            sessions_group = sessions_group.droplevel("participant_id")
            sessions_filepath = to / participant_id / f"{participant_id}_sessions.tsv"
            with fs.open(sessions_filepath, "w") as sessions_file:
                write_to_tsv(sessions_group, sessions_file)

    # Perform import of imaging data next.
    for filename, metadata in scans.iterrows():
        if metadata.format == "DCM":
            convert_dicom(sourcedata_dir=metadata.source_dir,
                          bids_filename=to / filename)
        else:
            install_nifti(sourcedata_dir=metadata.source_dir,
                          bids_filename=to / filename)

    return scans.index.to_list()
Example #11
0
 def test_file_not_supported(self):
     logger.set_level("error")
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/unsupported_file_type.usf')) as f:
         schema = from_file(f)
         assert(isinstance(schema, InvalidSchema))
         assert(schema.reason[0:32] == f"File type not supported for file")
Example #12
0
 def test_complex_json(self):
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/complex_json.json')) as f:
         schema = from_file(f)
         assert(isinstance(schema, JsonSchema))
         expect = {'$schema': 'http://json-schema.org/schema#', 'type': 'object', 'properties': {'data': {'type': 'array', 'items': {'type': 'object','properties': {'field1': {'type': 'string'},'field2': {'type': ['integer', 'string']},'field3': {'type': 'string'},'field4': {'type': 'string'},'field5': {'type': 'object','properties': {'some_other_stuff': {'type': 'string'}},'required': ['some_other_stuff']}}}}}, 'required': ['data']}
         assert(schema.schema == expect)
Example #13
0
 def test_jsonl(self):
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/json_lines.jsonl')) as f:
         schema = from_file(f)
         assert(isinstance(schema, JsonSchema))
         expect = {'$schema': 'http://json-schema.org/schema#', 'properties': {'field': {'type': 'string'},'field2': {'type': 'string'},'field3': {'type': 'string'},'field4': {'type': 'string'},'field5': {'type': 'string'},'field6': {'type': 'string'},'field7': {'type': 'string'}}, 'type': 'object'}
         assert(schema.schema == expect)
Example #14
0
def test_delete_cwd(tmpdir):
    cwd = os.getcwd()
    fs = LocalFileSystem()
    try:
        os.chdir(tmpdir)
        with pytest.raises(ValueError):
            fs.rm(".", recursive=True)
    finally:
        os.chdir(cwd)
Example #15
0
def test_directories(tmpdir):
    tmpdir = make_path_posix(str(tmpdir))
    fs = LocalFileSystem()
    fs.mkdir(tmpdir + "/dir")
    assert tmpdir + "/dir" in fs.ls(tmpdir)
    assert fs.ls(tmpdir, True)[0]["type"] == "directory"
    fs.rmdir(tmpdir + "/dir")
    assert not fs.ls(tmpdir)
    assert fs.ls(fs.root_marker)
Example #16
0
def install_nifti(sourcedata_dir: PathLike, bids_filename: PathLike) -> None:
    from fsspec.implementations.local import LocalFileSystem

    fs = LocalFileSystem(auto_mkdir=True)
    source_file = fs.open(fs.ls(sourcedata_dir)[0], mode="rb")
    target_file = fs.open(bids_filename, mode="wb", compression="gzip")

    with source_file as sf, target_file as tf:
        tf.write(sf.read())
Example #17
0
def test_get_pyarrow_filesystem():
    pa = pytest.importorskip("pyarrow")

    fs = LocalFileSystem()
    assert isinstance(fs, pa.filesystem.FileSystem)
    assert fs._get_pyarrow_filesystem() is fs

    class UnknownFileSystem(object):
        pass

    assert not isinstance(UnknownFileSystem(), pa.filesystem.FileSystem)
Example #18
0
def test_infer_compression(tmpdir, opener, ext):
    filename = str(tmpdir / f"test{ext}")
    content = b"hello world"
    with opener(filename, "wb") as fp:
        fp.write(content)

    fs = LocalFileSystem()
    with fs.open(f"file://{filename}", "rb", compression="infer") as fp:
        read_content = fp.read()

    assert content == read_content
Example #19
0
def test_directories(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    fs.mkdir(tmpdir + '/dir')
    assert tmpdir + '/dir' in fs.ls(tmpdir)
    assert fs.ls(tmpdir, True)[0]['type'] == 'directory'
    fs.rmdir(tmpdir + '/dir')
    assert not fs.ls(tmpdir)
Example #20
0
def test_directories(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    fs.mkdir(tmpdir + "/dir")
    assert tmpdir + "/dir" in fs.ls(tmpdir)
    assert fs.ls(tmpdir, True)[0]["type"] == "directory"
    fs.rmdir(tmpdir + "/dir")
    assert not fs.ls(tmpdir)
Example #21
0
 def test_valid_json(self):
     fs = LocalFileSystem()
     with fs.open(from_root('/test/sample_data/json_simple.json')) as f:
         schema = from_file(f)
         assert(isinstance(schema, JsonSchema))
         expect = {'$schema': 'http://json-schema.org/schema#',
                   'properties': {'field': {'type': 'string'}, 'field2': {'type': 'string'}, 'field3': {'type': 'string'}},
                   'required': ['field', 'field2', 'field3'],
                   'type': 'object'}
         assert(schema.schema == expect)
         assert(schema.to_dict() == {'Columns': [], 'SchemaType': 'json'})
         assert(schema.to_pd_dict() == {})
Example #22
0
def test_seekable(tmpdir):
    fs = LocalFileSystem()
    tmpdir = str(tmpdir)
    fn0 = os.path.join(tmpdir, "target")

    with open(fn0, "wb") as f:
        f.write(b"data")

    f = fs.open(fn0, "rt")
    assert f.seekable(), "file is not seekable"
    f.seek(1)
    assert f.read(1) == "a"
    assert f.tell() == 2
Example #23
0
def remove_local_file(filepath: Union[str, Path]) -> None:
    """
    Deletes a file from the local file system.

    Parameters
    ----------
    filepath: str
        The filepath of the local file to delete.
    """
    fs = LocalFileSystem()
    fs.rm(filepath)

    log.info(f"Removed {filepath} from local file system.")
Example #24
0
def test_transaction(tmpdir):
    file = str(tmpdir / "test.txt")
    fs = LocalFileSystem()

    with fs.transaction:
        content = "hello world"
        with fs.open(file, "w") as fp:
            fp.write(content)

    with fs.open(file, "r") as fp:
        read_content = fp.read()

    assert content == read_content
Example #25
0
def test_get_pyarrow_filesystem():
    pa = pytest.importorskip("pyarrow")

    fs = LocalFileSystem()
    if LooseVersion(pa.__version__) < LooseVersion("2.0"):
        assert isinstance(fs, pa.filesystem.FileSystem)
        assert fs._get_pyarrow_filesystem() is fs
    else:
        assert not isinstance(fs, pa.filesystem.FileSystem)

    class UnknownFileSystem(object):
        pass

    assert not isinstance(UnknownFileSystem(), pa.filesystem.FileSystem)
Example #26
0
    def test_csv_equality(self):
        fs = LocalFileSystem()
        with fs.open(from_root('/test/sample_data/csv_sample.csv')) as f:
            schema1 = from_file(f, {"read_headers": True})
            assert(isinstance(schema1, TextSchema))

        with fs.open(from_root('/test/sample_data/csv_sample_2.csv')) as f:
            schema2 = from_file(f, {"read_headers": True})
            assert(isinstance(schema2, TextSchema))

        schema = find_conflicts([schema1, schema2])[0]
        assert(isinstance(schema, SchemaConflict))
        expect = {'CountDistinctSchemas': 2, 'DistinctSchemas': [{'SchemaType': 'csv', 'Columns': [{'Name': 'type', 'Type': 'object'}, {'Name': 'price', 'Type': 'float64'}]},{'SchemaType': 'csv', 'Columns': [{'Name': 'type', 'Type': 'object'}, {'Name': 'price', 'Type': 'float64'}, {'Name': 'availabile', 'Type': 'bool'}, {'Name': 'date', 'Type': 'object'}]}], 'NonOverlappingColumns': [{'name': 'availabile', 'type': 'bool'}, {'name': 'date', 'type': 'object'}]}
        assert(schema.to_dict() == {'SchemaConflicts': expect})
Example #27
0
def test_abs_paths(tmpdir):
    tmpdir = str(tmpdir)
    here = os.getcwd()
    os.chdir(tmpdir)
    with open("tmp", "w") as f:
        f.write("hi")
    out = LocalFileSystem().glob("*")
    assert len(out) == 1
    assert "/" in out[0]
    assert "tmp" in out[0]

    fs = LocalFileSystem()
    os.chdir(here)
    with fs.open(out[0], "r") as f:
        res = f.read()
    assert res == "hi"
Example #28
0
def get_filesystem(path: Union[str, Path]) -> AbstractFileSystem:
    path = str(path)
    if "://" in path:
        # use the fileystem from the protocol specified
        return fsspec.filesystem(path.split(":", 1)[0])
    # use local filesystem
    return LocalFileSystem()
Example #29
0
def test_prepare_args(sag, sge, tmp_path):
    """Test preparing arguments for getting ABI and GLM data."""
    from sattools.scutil import prepare_abi_glm_ms_args
    from fsspec.implementations.local import LocalFileSystem
    from typhon.files.handlers.common import FileInfo
    from satpy.readers import FSFile
    sge.return_value = [
        FileInfo(path=str(tmp_path / f"glm{i:d}"),
                 times=[
                     datetime.datetime(1900, 1, 1, 0, i),
                     datetime.datetime(1900, 1, 1, 0, i + 1)
                 ],
                 attr={}) for i in range(5)
    ]
    sag.return_value = [
        FSFile(tmp_path / f"abi{i:d}", LocalFileSystem()) for i in range(5)
    ]
    (gfsfs, afsfs) = prepare_abi_glm_ms_args(datetime.datetime(1900, 1, 1, 0),
                                             datetime.datetime(1900, 1, 1, 6),
                                             chans={8, 10},
                                             sector="F")
    assert sag.call_args[1]["sector"] == "F"
    assert sge.call_args[1]["sector"] == "F"
    with pytest.raises(ValueError):
        prepare_abi_glm_ms_args(datetime.datetime(1900, 1, 1, 0),
                                datetime.datetime(1900, 1, 1, 6),
                                chans={8, 10},
                                sector="M1")
Example #30
0
def test_equality():
    """Test sane behaviour for equality and hashing.

    Make sure that different CachingFileSystem only test equal to each other
    when they should, and do not test equal to the filesystem they rely upon.
    Similarly, make sure their hashes differ when they should and are equal
    when they should not.

    Related: GitHub#577, GitHub#578
    """
    from fsspec.implementations.local import LocalFileSystem

    lfs = LocalFileSystem()
    cfs1 = CachingFileSystem(fs=lfs, cache_storage="raspberry")
    cfs2 = CachingFileSystem(fs=lfs, cache_storage="banana")
    cfs3 = CachingFileSystem(fs=lfs, cache_storage="banana")
    assert cfs1 == cfs1
    assert cfs1 != cfs2
    assert cfs1 != cfs3
    assert cfs2 == cfs3
    assert cfs1 != lfs
    assert cfs2 != lfs
    assert cfs3 != lfs
    assert hash(lfs) != hash(cfs1)
    assert hash(lfs) != hash(cfs2)
    assert hash(lfs) != hash(cfs3)
    assert hash(cfs1) != hash(cfs2)
    assert hash(cfs1) != hash(cfs2)
    assert hash(cfs2) == hash(cfs3)