Esempio n. 1
0
    def _open_dataset(self):
        import xarray as xr
        url = self.urlpath

        kwargs = self.xarray_kwargs

        if "*" in url or isinstance(url, list):
            _open_dataset = xr.open_mfdataset
            if self.pattern:
                kwargs.update(preprocess=self._add_path_to_ds)
            if self.combine is not None:
                if 'combine' in kwargs:
                    raise Exception(
                        "Setting 'combine' argument twice  in the catalog is invalid"
                    )
                kwargs.update(combine=self.combine)
            if self.concat_dim is not None:
                if 'concat_dim' in kwargs:
                    raise Exception(
                        "Setting 'concat_dim' argument twice  in the catalog is invalid"
                    )
                kwargs.update(concat_dim=self.concat_dim)
        else:
            _open_dataset = xr.open_dataset
        url = fsspec.open_local(url, **self.storage_options)

        self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
Esempio n. 2
0
def test_multi_cache(protocol):
    with fsspec.open_files("memory://file*", "wb", num=2) as files:
        for f in files:
            f.write(b"hello")

    d2 = tempfile.mkdtemp()
    lurl = fsspec.open_local(
        f"{protocol}::memory://file*",
        mode="rb",
        **{protocol: {"cache_storage": d2, "same_names": True}},
    )
    assert all(d2 in u for u in lurl)
    assert all(os.path.basename(f) in ["file0", "file1"] for f in lurl)
    assert all(open(u, "rb").read() == b"hello" for u in lurl)

    d2 = tempfile.mkdtemp()
    lurl = fsspec.open_files(
        f"{protocol}::memory://file*",
        mode="rb",
        **{protocol: {"cache_storage": d2, "same_names": True}},
    )
    with lurl as files:
        for f in files:
            assert os.path.basename(f.name) in ["file0", "file1"]
            assert f.read() == b"hello"
    fs = fsspec.filesystem("memory")
    fs.store.clear()
    with lurl as files:
        for f in files:
            assert os.path.basename(f.name) in ["file0", "file1"]
            assert f.read() == b"hello"
Esempio n. 3
0
    def _open_dataset(self):
        import xarray as xr
        url = self.urlpath

        kwargs = self.xarray_kwargs

        if "*" in url or isinstance(url, list):
            _open_dataset = xr.open_mfdataset
            if self.pattern:
                kwargs.update(preprocess=self._add_path_to_ds)
            if self.combine is not None:
                if 'combine' in kwargs:
                    raise Exception(
                        "Setting 'combine' argument twice  in the catalog is invalid"
                    )
                kwargs.update(combine=self.combine)
            if self.concat_dim is not None:
                if 'concat_dim' in kwargs:
                    raise Exception(
                        "Setting 'concat_dim' argument twice  in the catalog is invalid"
                    )
                kwargs.update(concat_dim=self.concat_dim)
        else:
            _open_dataset = xr.open_dataset

        if self._can_be_local:
            url = fsspec.open_local(self.urlpath, **self.storage_options)
        else:
            # https://github.com/intake/filesystem_spec/issues/476#issuecomment-732372918
            url = fsspec.open(self.urlpath, **self.storage_options).open()

        self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
Esempio n. 4
0
def test_again(protocol):
    fn = "memory://afile"
    with fsspec.open(fn, "wb") as f:
        f.write(b"hello")
    d2 = tempfile.mkdtemp()
    lurl = fsspec.open_local(f"{protocol}::{fn}", **{protocol: {"cache_storage": d2}})
    assert os.path.exists(lurl)
    assert d2 in lurl
    assert open(lurl, "rb").read() == b"hello"

    # remove cache dir
    shutil.rmtree(d2)
    assert not os.path.exists(lurl)

    # gets recreated
    lurl = fsspec.open_local(f"{protocol}::{fn}", **{protocol: {"cache_storage": d2}})
    assert open(lurl, "rb").read() == b"hello"
Esempio n. 5
0
 def _open_dataset(self):
     import xarray as xr
     files = fsspec.open_local(self.urlpath, **self.storage_options)
     if isinstance(files, list):
         self._ds = self._open_files(files)
     else:
         self._ds = xr.open_rasterio(files,
                                     chunks=self.chunks,
                                     **self._kwargs)
Esempio n. 6
0
 def _resolve_single_file(self, filelist):
     """
     Given a list of fsspec OpenFiles, find a .shp file.
     """
     local_files = fsspec.open_local(self.urlpath, **self.storage_options)
     for f in local_files:
         if f.endswith(".shp"):
             return f
     raise ValueError(
         f"No shapefile found in {filelist}, if you are using fsspec caching"
         " consider using same_names=True")
Esempio n. 7
0
 def _open_dataset(self, path: str) -> xarray.Dataset:
   """Open as an XArray Dataset, sometimes with local caching."""
   if self.local_copy:
     with tempfile.TemporaryDirectory() as tmpdir:
       local_file = fsspec.open_local(
         f"simplecache::{path}",
         simplecache={'cache_storage': tmpdir}
       )
       yield xarray.open_dataset(local_file, **self.xarray_open_kwargs)
   else:
     with FileSystems().open(path) as file:
         yield xarray.open_dataset(file, **self.xarray_open_kwargs)
Esempio n. 8
0
def _open_dataset(
    urlpath,
    varname,
    *,
    xarray_open_kwargs=None,
    preprocess=None,
    requested_variables=None,
    additional_attrs=None,
    expand_dims=None,
):

    _can_be_local = fsspec.utils.can_be_local(urlpath)
    storage_options = xarray_open_kwargs.get('backend_kwargs',
                                             {}).get('storage_options', {})
    if xarray_open_kwargs['engine'] == 'zarr':
        url = urlpath
    elif _can_be_local:
        url = fsspec.open_local(urlpath, **storage_options)
    else:
        url = fsspec.open(urlpath, **storage_options).open()

    # Handle multi-file datasets with `xr.open_mfdataset()`
    if '*' in url or isinstance(url, list):
        # How should we handle concat_dim, and other xr.open_mfdataset kwargs?
        xarray_open_kwargs.update(preprocess=preprocess)
        xarray_open_kwargs.update(parallel=True)
        ds = xr.open_mfdataset(url, **xarray_open_kwargs)
    else:
        ds = xr.open_dataset(url, **xarray_open_kwargs)
        if preprocess is not None:
            ds = preprocess(ds)

    if varname and isinstance(varname, str):
        varname = [varname]
    if requested_variables:
        if isinstance(requested_variables, str):
            requested_variables = [requested_variables]
        variable_intersection = set(requested_variables).intersection(
            set(varname))
        variables = [
            variable for variable in variable_intersection
            if variable in ds.data_vars
        ]
        ds = ds[variables]
        ds.attrs[INTAKE_ESM_VARS_KEY] = variables
    else:
        ds.attrs[INTAKE_ESM_VARS_KEY] = varname

    ds = _expand_dims(expand_dims, ds)
    ds = _update_attrs(additional_attrs, ds)
    return ds
Esempio n. 9
0
 def _open_dataset(self):
     import xarray as xr
     if self._can_be_local:
         files = fsspec.open_local(self.urlpath, **self.storage_options)
     else:
         # pass URLs to delegate remote opening to rasterio library
         files = self.urlpath
         #files = fsspec.open(self.urlpath, **self.storage_options).open()
     if isinstance(files, list):
         self._ds = self._open_files(files)
     else:
         self._ds = xr.open_rasterio(files,
                                     chunks=self.chunks,
                                     **self._kwargs)
Esempio n. 10
0
    def _open_dataset(self):
        import xarray as xr
        url = self.urlpath
        kwargs = self._kwargs
        if "*" in url or isinstance(url, list):
            _open_dataset = xr.open_mfdataset
            if 'concat_dim' not in kwargs.keys():
                kwargs.update(concat_dim=self.concat_dim)
            if self.pattern:
                kwargs.update(preprocess=self._add_path_to_ds)
            if 'combine' not in kwargs.keys():
                kwargs.update(combine='nested')
        else:
            _open_dataset = xr.open_dataset
        url = fsspec.open_local(url, **self.storage_options)

        self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
Esempio n. 11
0
def _open_dataset(
    urlpath,
    varname,
    *,
    xarray_open_kwargs=None,
    preprocess=None,
    requested_variables=None,
    additional_attrs=None,
    expand_dims=None,
):

    _can_be_local = fsspec.utils.can_be_local(urlpath)
    storage_options = xarray_open_kwargs['backend_kwargs'].get(
        'storage_options', {})
    if xarray_open_kwargs['engine'] == 'zarr':
        url = urlpath
    elif _can_be_local:
        url = fsspec.open_local(urlpath, **storage_options)
    else:
        url = fsspec.open(urlpath, **storage_options).open()

    ds = xr.open_dataset(url, **xarray_open_kwargs)

    if preprocess is not None:
        ds = preprocess(ds)
    if varname and isinstance(varname, str):
        varname = [varname]
    if requested_variables:
        if isinstance(requested_variables, str):
            requested_variables = [requested_variables]
        variable_intersection = set(requested_variables).intersection(
            set(varname))
        variables = [
            variable for variable in variable_intersection
            if variable in ds.data_vars
        ]
        ds = ds[variables]
        ds.attrs[INTAKE_ESM_VARS_KEY] = variables
    else:
        ds.attrs[INTAKE_ESM_VARS_KEY] = varname

    ds = _expand_dims(expand_dims, ds)
    ds = _update_attrs(additional_attrs, ds)
    return ds
Esempio n. 12
0
def download_h5(url):
    local_path = fsspec.open_local(url)
    with h5py.File(local_path, "r") as f:
        yield f