Ejemplo n.º 1
0
def test_chained_url(ftp_writable):
    host, port, username, password = "******", 2121, "user", "pass"

    @contextmanager
    def tempzip(data={}):
        f = tempfile.mkstemp(suffix="zip")[1]
        with zipfile.ZipFile(f, mode="w") as z:
            for k, v in data.items():
                z.writestr(k, v)
        try:
            yield f
        finally:
            try:
                os.remove(f)
            except (IOError, OSError):
                pass

    data = {"afile": b"hello"}

    cls = fsspec.get_filesystem_class("ftp")
    fs = cls(host=host, port=port, username=username, password=password)
    with tempzip(data) as lfile:
        fs.put_file(lfile, "archive.zip")

    urls = [
        "zip://afile",
        "zip://*****:*****@{host}:{port}/archive.zip"
        with fsspec.open(url, "rb") as f:
            assert f.read() == data["afile"]
Ejemplo n.º 2
0
def get_ecoregions(tempdir):
    fs = fsspec.get_filesystem_class("http")()
    for kind in ["baileys", "supersections"]:
        path = os.path.join(tempdir, f"raw/{kind}.geojson")
        fs.download(
            f"https://storage.googleapis.com/carbonplan-data/raw/ecoregions/{kind}.geojson",
            path)
def download(source_url: str, cache_location: str) -> str:
    """
    Download a remote file to a cache.
    Parameters
    ----------
    source_url : str
        Path or url to the source file.
    cache_location : str
        Path or url to the target location for the source file.
    Returns
    -------
    target_url : str
        Path or url in the form of `{cache_location}/hash({source_url})`.
    """
    fs = fsspec.get_filesystem_class(
        cache_location.split(':')[0])(token='cloud')

    target_url = os.path.join(cache_location, str(hash(source_url)))

    # there is probably a better way to do caching!
    try:
        fs.open(target_url)
        return target_url
    except FileNotFoundError:
        pass

    with fsspec.open(source_url, mode="rb") as source:
        with fs.open(target_url, mode="wb") as target:
            target.write(source.read())
    return target_url
Ejemplo n.º 4
0
def test_combine_and_write():
    ds = xr.tutorial.open_dataset("rasm").load()
    dsets = ds.isel(time=slice(18)), ds.isel(time=slice(18, None))
    fs = fsspec.get_filesystem_class("memory")()

    for i, dset in enumerate(dsets):
        as_bytes = dset.to_netcdf()

        with fs.open(f"cache/{i}.nc", "wb") as f:
            f.write(as_bytes)

    sources = [f"memory://{dset}" for dset in fs.ls("cache")]

    # In a flow context

    target = "memory://target.zarr"
    with Flow("test") as flow:
        result = pangeo_forge.tasks.xarray.combine_and_write(sources,
                                                             target,
                                                             concat_dim="time",
                                                             append_dim="time")
        assert isinstance(result, Task)
    flow.validate()

    result = pangeo_forge.tasks.xarray.combine_and_write.run(sources,
                                                             target,
                                                             concat_dim="time",
                                                             append_dim="time")
    assert result == target
    result = xr.open_zarr(fs.get_mapper("target.zarr"))
    xr.testing.assert_equal(ds, result)
Ejemplo n.º 5
0
def test_get_asset_works_with_custom_filesystem(catalog_with_assets):
    fs = fsspec.get_filesystem_class("file")
    fs = fs()
    asset = get_asset(catalog_with_assets,
                      asset_key="tile",
                      item_id="tile_1",
                      filesystem=fs)
    assert isinstance(asset, xr.DataArray)
Ejemplo n.º 6
0
 def _get_fs_and_protocol(self):
     storage_options = self.storage_options or {}
     protocol, path = split_protocol(self.prefix_path)
     cls = fsspec.get_filesystem_class(protocol)
     options = cls._get_kwargs_from_urls(self.prefix_path)
     update_storage_options(options, storage_options)
     fs = cls(**options)
     return fs, protocol
Ejemplo n.º 7
0
def test_makedirs_exist_ok(ssh):
    f = fsspec.get_filesystem_class("sftp")(**ssh)

    f.makedirs("/a/b/c")

    with pytest.raises(FileExistsError, match="/a/b/c"):
        f.makedirs("/a/b/c", exist_ok=False)

    f.makedirs("/a/b/c", exist_ok=True)
Ejemplo n.º 8
0
def can_be_local(path):
    """Can the given URL be used wih open_local?"""
    from fsspec import get_filesystem_class

    try:
        return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
    except (ValueError, ImportError):
        # not in registry or import failed
        return False
Ejemplo n.º 9
0
def test_register():
    cls = fsspec.get_filesystem_class(V3ioFS.protocol)
    assert cls is V3ioFS, 'not registered'

    options = {
        'v3io_api': 'a.b.com',
        'v3io_access_key': 's3cr3t',
    }
    fs = fsspec.filesystem('v3io', **options)
    assert isinstance(fs, V3ioFS), f'bad object class - {fs.__class__}'
Ejemplo n.º 10
0
    def set_filesystem(self, filesystem=None):
        """
        Configure driver authentication

        :param filesystem: (optional, `fsspec` compatible FileSystem instance)
            file system associated to the driver
        """
        self.filesystem = filesystem
        if self.filesystem is None:
            fs = fsspec.get_filesystem_class("file")
            self.filesystem = fs()
Ejemplo n.º 11
0
def test_simple(ssh):
    f = fsspec.get_filesystem_class('sftp')(**ssh)
    f.mkdirs('/home/someuser/deeper')
    f.touch('/home/someuser/deeper/afile')
    assert f.find('/home/someuser') == ['/home/someuser/deeper/afile']
    assert f.ls('/home/someuser/deeper/') == ['/home/someuser/deeper/afile']
    assert f.info('/home/someuser/deeper/afile')['type'] == 'file'
    assert f.info('/home/someuser/deeper/afile')['size'] == 0
    assert f.exists('/home/someuser')
    f.rm('/home/someuser', recursive=True)
    assert not f.exists('/home/someuser')
Ejemplo n.º 12
0
def test_simple(ssh):
    f = fsspec.get_filesystem_class("sftp")(**ssh)
    f.mkdirs("/home/someuser/deeper")
    f.touch("/home/someuser/deeper/afile")
    assert f.find("/home/someuser") == ["/home/someuser/deeper/afile"]
    assert f.ls("/home/someuser/deeper/") == ["/home/someuser/deeper/afile"]
    assert f.info("/home/someuser/deeper/afile")["type"] == "file"
    assert f.info("/home/someuser/deeper/afile")["size"] == 0
    assert f.exists("/home/someuser")
    f.rm("/home/someuser", recursive=True)
    assert not f.exists("/home/someuser")
Ejemplo n.º 13
0
    def join(self, path, *paths):
        """Join paths with a slash."""
        self._validate_path(path)

        before, sep, last_path = path.rpartition(self.CHAIN_SEPARATOR)
        chain_prefix = before + sep
        protocol, path = fsspec.core.split_protocol(last_path)
        fs = fsspec.get_filesystem_class(protocol)
        if protocol:
            chain_prefix += protocol + self.SEPARATOR
        return chain_prefix + self._join(fs.sep, ((path, ) + paths))
Ejemplo n.º 14
0
def get_dir(path):
    if '://' in path:
        protocol, _ = split_protocol(path)
        out = get_filesystem_class(protocol)._parent(path)
        if "://" not in out:
            # some FSs strip this, some do not
            out = protocol + "://" + out
        return out
    path = make_path_posix(os.path.join(os.getcwd(), os.path.dirname(path)))
    if path[-1] != '/':
        path += '/'
    return path
def wsi_file_urlpath(wsi_file):
    if wsi_file.stat().st_size > 100 * 1024 * 1024:
        pytest.skip("reduce ram usage of tests")
    urlpath = f"memory://{wsi_file.name}"
    fs: fsspec.AbstractFileSystem = fsspec.get_filesystem_class("memory")()
    of = fsspec.open(urlpath, mode="wb")
    with of as f:
        f.write(wsi_file.read_bytes())
    try:
        yield urlpath
    finally:
        fs.rm(wsi_file.name)
Ejemplo n.º 16
0
def test_simple(smb_params):
    adir = "/home/adir"
    adir2 = "/home/adir/otherdir/"
    afile = "/home/adir/otherdir/afile"
    fsmb = fsspec.get_filesystem_class("smb")(**smb_params)
    fsmb.mkdirs(adir2)
    fsmb.touch(afile)
    assert fsmb.find(adir) == [afile]
    assert fsmb.ls(adir2, detail=False) == [afile]
    assert fsmb.info(afile)["type"] == "file"
    assert fsmb.info(afile)["size"] == 0
    assert fsmb.exists(adir)
    fsmb.rm(adir, recursive=True)
    assert not fsmb.exists(adir)
Ejemplo n.º 17
0
def _get_fsspec_filesystem(filename):
    """
    _get_fsspec_filesystem checks if the provided protocol is known to fsspec
    and if so returns the filesystem wrapper for it.
    """
    if not FSSPEC_ENABLED:
        return None

    segment = filename.partition(FSSpecFileSystem.CHAIN_SEPARATOR)[0]
    protocol = segment.partition(FSSpecFileSystem.SEPARATOR)[0]
    if fsspec.get_filesystem_class(protocol):
        return _FSSPEC_FILESYSTEM
    else:
        return None
Ejemplo n.º 18
0
def nc2zarr(source_url: str, cache_location: str) -> str:
    """convert netcdf data to zarr"""
    fs = fsspec.get_filesystem_class(source_url.split(':')[0])(token='cloud')

    target_url = source_url + ".zarr"

    with dask.config.set(scheduler="single-threaded"):

        ds = (xr.open_dataset(fs.open(source_url)).pipe(preproc).pipe(
            postproc).load().chunk(chunks))

        mapper = fs.get_mapper(target_url)
        ds.to_zarr(mapper, mode='w')

    return target_url
Ejemplo n.º 19
0
def test_transaction(ssh):
    f = fsspec.get_filesystem_class('sftp')(**ssh)
    f.mkdirs('/home/someuser/deeper')
    f.start_transaction()
    f.touch('/home/someuser/deeper/afile')
    assert f.find('/home/someuser') == []
    f.end_transaction()
    f.find('/home/someuser') == ['/home/someuser/deeper/afile']

    with f.transaction:
        assert f._intrans
        f.touch('/home/someuser/deeper/afile2')
        assert f.find('/home/someuser') == ['/home/someuser/deeper/afile']
    assert f.find('/home/someuser') == [
        '/home/someuser/deeper/afile', '/home/someuser/deeper/afile2'
    ]
Ejemplo n.º 20
0
def get_projects(tempdir):
    with fsspec.open(
            "https://carbonplan.blob.core.windows.net/carbonplan-forests/offsets/database/forest-offsets-database-v1.0.json"
    ) as f:
        projects = json.load(f)

    fs = fsspec.get_filesystem_class("http")()
    for project in projects:
        pid = project["id"]
        slug = f"projects/{pid}/shape.json"
        print(slug, "slug")
        path = os.path.join(tempdir, f"raw/{slug}")
        print(path, "path")
        fs.download(
            f"https://carbonplan.blob.core.windows.net/carbonplan-forests/offsets/database/{slug}",
            path,
        )
Ejemplo n.º 21
0
def test_transaction(ssh):
    f = fsspec.get_filesystem_class("sftp")(**ssh)
    f.mkdirs("/home/someuser/deeper")
    f.start_transaction()
    f.touch("/home/someuser/deeper/afile")
    assert f.find("/home/someuser") == []
    f.end_transaction()
    f.find("/home/someuser") == ["/home/someuser/deeper/afile"]

    with f.transaction:
        assert f._intrans
        f.touch("/home/someuser/deeper/afile2")
        assert f.find("/home/someuser") == ["/home/someuser/deeper/afile"]
    assert f.find("/home/someuser") == [
        "/home/someuser/deeper/afile",
        "/home/someuser/deeper/afile2",
    ]
Ejemplo n.º 22
0
def test_transaction(smb_params):
    afile = "/home/afolder/otherdir/afile"
    afile2 = "/home/afolder/otherdir/afile2"
    adir = "/home/afolder"
    adir2 = "/home/afolder/otherdir"
    fsmb = fsspec.get_filesystem_class("smb")(**smb_params)
    fsmb.mkdirs(adir2)
    fsmb.start_transaction()
    fsmb.touch(afile)
    assert fsmb.find(adir) == []
    fsmb.end_transaction()
    assert fsmb.find(adir) == [afile]

    with fsmb.transaction:
        assert fsmb._intrans
        fsmb.touch(afile2)
        assert fsmb.find(adir) == [afile]
    assert fsmb.find(adir) == [afile, afile2]
Ejemplo n.º 23
0
def test_chained_url(ftp_writable):
    host, port, username, password = ftp_writable
    data = {"afile": b"hello"}
    cls = fsspec.get_filesystem_class("ftp")
    fs = cls(host=host, port=port, username=username, password=password)
    with tempzip(data) as lfile:
        fs.put_file(lfile, "archive.zip")

    urls = [
        "zip://afile",
        "zip://*****:*****@{host}:{port}/archive.zip"
        with fsspec.open(url, "rb") as f:
            assert f.read() == data["afile"]
Ejemplo n.º 24
0
def extract_files_to_message(file, fs, subject, packing=None):
    """Try extracting a file virtually and create the corresponding message.

    If the file is not an archive, create a message with the original file instead.
    """
    file, filename = _get_filename(file, fs)

    if packing is None:
        return create_message_with_json_fs(fs.to_json(), file, subject)

    fs_class = get_filesystem_class(packing)

    protocol = _get_fs_protocol(fs)
    packfs = fs_class(fo=filename,
                      target_protocol=protocol,
                      target_options=fs.storage_options)
    file_list = list(packfs.find('/', detail=True).values())
    return create_message_with_json_fs(packfs.to_json(), file_list, subject,
                                       file.get('metadata'))
Ejemplo n.º 25
0
def test_list(server):
    h = fsspec.get_filesystem_class('http')()
    out = h.glob(server + '/*.py')
    expected = glob.glob('*.py')
    for fn in expected:
        assert any(fn in f for f in out)
Ejemplo n.º 26
0
def test_pickle():
    with tempzip(data) as z:
        fs = fsspec.get_filesystem_class("zip")(fo=z)
        fs2 = pickle.loads(pickle.dumps(fs))
        assert fs2.cat("b") == b"hello"
Ejemplo n.º 27
0
def test_mapping():
    with tempzip(data) as z:
        fs = fsspec.get_filesystem_class("zip")(fo=z)
        m = fs.get_mapper("")
        assert list(m) == ["a", "b", "deeply/nested/path"]
        assert m["b"] == data["b"]
Ejemplo n.º 28
0
def test_empty():
    with tempzip() as z:
        fs = fsspec.get_filesystem_class("zip")(fo=z)
        assert fs.find("") == []
Ejemplo n.º 29
0
def upload_tiles(kind, tempdir, upload_to):
    print(f"uploading {kind} to {upload_to}")
    fs = fsspec.get_filesystem_class(upload_to.split(":")[0])()
    lpath = f"{tempdir}/processed/{kind}/"
    rpath = f"{upload_to}/{kind}"
    fs.put(lpath, rpath, recursive=True)
Ejemplo n.º 30
0
def get_fires(tempdir):
    fs = fsspec.get_filesystem_class("http")()
    path = os.path.join(tempdir, "raw/fires.geojson")
    fs.download(
        "https://storage.googleapis.com/carbonplan-research/offset-fires/fires.json",
        path)