def _open_dataset(self): import xarray as xr url = self.urlpath kwargs = self.xarray_kwargs if "*" in url or isinstance(url, list): _open_dataset = xr.open_mfdataset if self.pattern: kwargs.update(preprocess=self._add_path_to_ds) if self.combine is not None: if 'combine' in kwargs: raise Exception( "Setting 'combine' argument twice in the catalog is invalid" ) kwargs.update(combine=self.combine) if self.concat_dim is not None: if 'concat_dim' in kwargs: raise Exception( "Setting 'concat_dim' argument twice in the catalog is invalid" ) kwargs.update(concat_dim=self.concat_dim) else: _open_dataset = xr.open_dataset url = fsspec.open_local(url, **self.storage_options) self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
def test_multi_cache(protocol): with fsspec.open_files("memory://file*", "wb", num=2) as files: for f in files: f.write(b"hello") d2 = tempfile.mkdtemp() lurl = fsspec.open_local( f"{protocol}::memory://file*", mode="rb", **{protocol: {"cache_storage": d2, "same_names": True}}, ) assert all(d2 in u for u in lurl) assert all(os.path.basename(f) in ["file0", "file1"] for f in lurl) assert all(open(u, "rb").read() == b"hello" for u in lurl) d2 = tempfile.mkdtemp() lurl = fsspec.open_files( f"{protocol}::memory://file*", mode="rb", **{protocol: {"cache_storage": d2, "same_names": True}}, ) with lurl as files: for f in files: assert os.path.basename(f.name) in ["file0", "file1"] assert f.read() == b"hello" fs = fsspec.filesystem("memory") fs.store.clear() with lurl as files: for f in files: assert os.path.basename(f.name) in ["file0", "file1"] assert f.read() == b"hello"
def _open_dataset(self): import xarray as xr url = self.urlpath kwargs = self.xarray_kwargs if "*" in url or isinstance(url, list): _open_dataset = xr.open_mfdataset if self.pattern: kwargs.update(preprocess=self._add_path_to_ds) if self.combine is not None: if 'combine' in kwargs: raise Exception( "Setting 'combine' argument twice in the catalog is invalid" ) kwargs.update(combine=self.combine) if self.concat_dim is not None: if 'concat_dim' in kwargs: raise Exception( "Setting 'concat_dim' argument twice in the catalog is invalid" ) kwargs.update(concat_dim=self.concat_dim) else: _open_dataset = xr.open_dataset if self._can_be_local: url = fsspec.open_local(self.urlpath, **self.storage_options) else: # https://github.com/intake/filesystem_spec/issues/476#issuecomment-732372918 url = fsspec.open(self.urlpath, **self.storage_options).open() self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
def test_again(protocol): fn = "memory://afile" with fsspec.open(fn, "wb") as f: f.write(b"hello") d2 = tempfile.mkdtemp() lurl = fsspec.open_local(f"{protocol}::{fn}", **{protocol: {"cache_storage": d2}}) assert os.path.exists(lurl) assert d2 in lurl assert open(lurl, "rb").read() == b"hello" # remove cache dir shutil.rmtree(d2) assert not os.path.exists(lurl) # gets recreated lurl = fsspec.open_local(f"{protocol}::{fn}", **{protocol: {"cache_storage": d2}}) assert open(lurl, "rb").read() == b"hello"
def _open_dataset(self): import xarray as xr files = fsspec.open_local(self.urlpath, **self.storage_options) if isinstance(files, list): self._ds = self._open_files(files) else: self._ds = xr.open_rasterio(files, chunks=self.chunks, **self._kwargs)
def _resolve_single_file(self, filelist): """ Given a list of fsspec OpenFiles, find a .shp file. """ local_files = fsspec.open_local(self.urlpath, **self.storage_options) for f in local_files: if f.endswith(".shp"): return f raise ValueError( f"No shapefile found in {filelist}, if you are using fsspec caching" " consider using same_names=True")
def _open_dataset(self, path: str) -> xarray.Dataset: """Open as an XArray Dataset, sometimes with local caching.""" if self.local_copy: with tempfile.TemporaryDirectory() as tmpdir: local_file = fsspec.open_local( f"simplecache::{path}", simplecache={'cache_storage': tmpdir} ) yield xarray.open_dataset(local_file, **self.xarray_open_kwargs) else: with FileSystems().open(path) as file: yield xarray.open_dataset(file, **self.xarray_open_kwargs)
def _open_dataset( urlpath, varname, *, xarray_open_kwargs=None, preprocess=None, requested_variables=None, additional_attrs=None, expand_dims=None, ): _can_be_local = fsspec.utils.can_be_local(urlpath) storage_options = xarray_open_kwargs.get('backend_kwargs', {}).get('storage_options', {}) if xarray_open_kwargs['engine'] == 'zarr': url = urlpath elif _can_be_local: url = fsspec.open_local(urlpath, **storage_options) else: url = fsspec.open(urlpath, **storage_options).open() # Handle multi-file datasets with `xr.open_mfdataset()` if '*' in url or isinstance(url, list): # How should we handle concat_dim, and other xr.open_mfdataset kwargs? xarray_open_kwargs.update(preprocess=preprocess) xarray_open_kwargs.update(parallel=True) ds = xr.open_mfdataset(url, **xarray_open_kwargs) else: ds = xr.open_dataset(url, **xarray_open_kwargs) if preprocess is not None: ds = preprocess(ds) if varname and isinstance(varname, str): varname = [varname] if requested_variables: if isinstance(requested_variables, str): requested_variables = [requested_variables] variable_intersection = set(requested_variables).intersection( set(varname)) variables = [ variable for variable in variable_intersection if variable in ds.data_vars ] ds = ds[variables] ds.attrs[INTAKE_ESM_VARS_KEY] = variables else: ds.attrs[INTAKE_ESM_VARS_KEY] = varname ds = _expand_dims(expand_dims, ds) ds = _update_attrs(additional_attrs, ds) return ds
def _open_dataset(self): import xarray as xr if self._can_be_local: files = fsspec.open_local(self.urlpath, **self.storage_options) else: # pass URLs to delegate remote opening to rasterio library files = self.urlpath #files = fsspec.open(self.urlpath, **self.storage_options).open() if isinstance(files, list): self._ds = self._open_files(files) else: self._ds = xr.open_rasterio(files, chunks=self.chunks, **self._kwargs)
def _open_dataset(self): import xarray as xr url = self.urlpath kwargs = self._kwargs if "*" in url or isinstance(url, list): _open_dataset = xr.open_mfdataset if 'concat_dim' not in kwargs.keys(): kwargs.update(concat_dim=self.concat_dim) if self.pattern: kwargs.update(preprocess=self._add_path_to_ds) if 'combine' not in kwargs.keys(): kwargs.update(combine='nested') else: _open_dataset = xr.open_dataset url = fsspec.open_local(url, **self.storage_options) self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
def _open_dataset( urlpath, varname, *, xarray_open_kwargs=None, preprocess=None, requested_variables=None, additional_attrs=None, expand_dims=None, ): _can_be_local = fsspec.utils.can_be_local(urlpath) storage_options = xarray_open_kwargs['backend_kwargs'].get( 'storage_options', {}) if xarray_open_kwargs['engine'] == 'zarr': url = urlpath elif _can_be_local: url = fsspec.open_local(urlpath, **storage_options) else: url = fsspec.open(urlpath, **storage_options).open() ds = xr.open_dataset(url, **xarray_open_kwargs) if preprocess is not None: ds = preprocess(ds) if varname and isinstance(varname, str): varname = [varname] if requested_variables: if isinstance(requested_variables, str): requested_variables = [requested_variables] variable_intersection = set(requested_variables).intersection( set(varname)) variables = [ variable for variable in variable_intersection if variable in ds.data_vars ] ds = ds[variables] ds.attrs[INTAKE_ESM_VARS_KEY] = variables else: ds.attrs[INTAKE_ESM_VARS_KEY] = varname ds = _expand_dims(expand_dims, ds) ds = _update_attrs(additional_attrs, ds) return ds
def download_h5(url): local_path = fsspec.open_local(url) with h5py.File(local_path, "r") as f: yield f