def __init__(self, ds: str = "", cache: bool = False, cachedir: str = "", **kwargs): """ Instantiate an Argovis Argo data loader Parameters ---------- ds: 'phy' cache : False cachedir : None """ self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=120) self.definition = 'Argovis Argo data fetcher' self.dataset_id = OPTIONS['dataset'] if ds == '' else ds self.server = api_server self.init(**kwargs) self.key_map = { 'date': 'TIME', 'date_qc': 'TIME_QC', 'lat': 'LATITUDE', 'lon': 'LONGITUDE', 'cycle_number': 'CYCLE_NUMBER', 'DATA_MODE': 'DATA_MODE', 'DIRECTION': 'DIRECTION', 'platform_number': 'PLATFORM_NUMBER', 'position_qc': 'POSITION_QC', 'pres': 'PRES', 'temp': 'TEMP', 'psal': 'PSAL', 'index': 'N_POINTS' }
def open_etopo1(box, res='l'): """ Download ETOPO for a box Parameters ---------- box: [xmin, xmax, ymin, ymax] Returns ------- xarray.Dataset """ # This function is in utilities to anticipate usage outside of plotting, eg interpolation, grounding detection resx, resy = 0.1, 0.1 if res == 'h': resx, resy = 0.016, 0.016 uri = ( "https://gis.ngdc.noaa.gov/mapviewer-support/wcs-proxy/wcs.groovy?filename=etopo1.nc" "&request=getcoverage&version=1.0.0&service=wcs&coverage=etopo1&CRS=EPSG:4326&format=netcdf" "&resx={}&resy={}" "&bbox={}").format thisurl = uri(resx, resy, ",".join([str(b) for b in [box[0], box[2], box[1], box[3]]])) ds = httpstore(cache=True).open_dataset(thisurl) da = ds['Band1'].rename("topo") for a in ds.attrs: da.attrs[a] = ds.attrs[a] da.attrs['Data source'] = 'https://maps.ngdc.noaa.gov/viewers/wcs-client/' da.attrs['URI'] = thisurl return da
def __init__(self, cache: bool = False, cachedir: str = "", **kwargs): """ Instantiate an ERDDAP Argo index loader with force caching """ self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=120) self.definition = 'Ifremer erddap Argo index fetcher' self.dataset_id = 'index' self.server = api_server self.init(**kwargs) self._init_erddapy()
def erddap_ds_exists(ds="ArgoFloats"): """ Given erddap fetcher, check if a Dataset exists, return a bool""" # e = ArgoDataFetcher(src='erddap').float(wmo=0).fetcher # erddap_index = json.load(urlopen(e.erddap.server + "/info/index.json")) # erddap_index = json.load(urlopen("http://www.ifremer.fr/erddap/info/index.json")) with httpstore(timeout=120).open( "http://www.ifremer.fr/erddap/info/index.json") as of: erddap_index = json.load(of) return ds in [row[-1] for row in erddap_index['table']['rows']]
def test_clear_cache(self): uri = "https://github.com/euroargodev/argopy-data/raw/master/ftp/ar_index_global_prof.txt" with tempfile.TemporaryDirectory() as cachedir: fs = httpstore(cache=True, cachedir=cachedir) fs.read_csv(uri, skiprows=8, header=0) assert isinstance(fs.cachepath(uri), str) assert os.path.isfile(fs.cachepath(uri)) fs.clear_cache() with pytest.raises(CacheFileNotFound): fs.cachepath(uri)
def test_open_mfjson(self): fs = httpstore() uri = [ "https://argovis.colorado.edu/catalog/mprofiles/?ids=['6902746_%i']" % i for i in [12, 13] ] for method in ["seq", "thread"]: for progress in [True, False]: lst = fs.open_mfjson(uri, method=method, progress=progress) assert all(is_list_of_dicts(x) for x in lst)
def test_cachefile(self): uri = 'https://github.com/euroargodev/argopy-data/raw/master/ftp/ar_index_global_prof.txt' testcachedir = os.path.expanduser(os.path.join("~", ".argopytest_tmp")) try: fs = httpstore(cache=1, cachedir=testcachedir) fs.open_dataframe(uri, skiprows=8, header=0) assert isinstance(fs.cachepath(uri), str) shutil.rmtree(testcachedir) except Exception: shutil.rmtree(testcachedir) raise
def test_open_mfdataset(self): fs = httpstore() uri = [ "https://github.com/euroargodev/argopy-data/raw/master/ftp/dac/csiro/5900865/profiles/D5900865_00%i.nc" % i for i in [1, 2] ] for method in ["seq", "thread"]: for progress in [True, False]: assert isinstance( fs.open_mfdataset(uri, method=method, progress=progress), xr.Dataset) assert is_list_of_datasets( fs.open_mfdataset(uri, method=method, progress=progress, concat=False))
def __init__(self, ds: str = "", cache: bool = False, cachedir: str = "", **kwargs): """ Instantiate an ERDDAP Argo data loader Parameters ---------- ds: 'phy' or 'ref' or 'bgc' cache : False cachedir : None """ self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=120) self.definition = 'Ifremer erddap Argo data fetcher' self.dataset_id = OPTIONS['dataset'] if ds == '' else ds self.init(**kwargs) self._init_erddapy()
def test_cache(self): fs = httpstore(cache=1) assert isinstance( fs.fs, fsspec.implementations.cached.WholeFileCacheFileSystem)
def test_open_dataset(self): uri = 'https://github.com/euroargodev/argopy-data/raw/master/ftp/dac/csiro/5900865/5900865_prof.nc' fs = httpstore() assert isinstance(fs.open_dataset(uri), xr.Dataset)
def test_open_dataframe(self): uri = 'https://github.com/euroargodev/argopy-data/raw/master/ftp/ar_index_global_prof.txt' fs = httpstore() assert isinstance(fs.open_dataframe(uri, skiprows=8, header=0), pd.core.frame.DataFrame)
def test_creation(self): fs = httpstore(cache=0) assert isinstance(fs.fs, fsspec.implementations.http.HTTPFileSystem)
def __init__( self, ds: str = "", cache: bool = False, cachedir: str = "", parallel: bool = False, parallel_method: str = "thread", progress: bool = False, chunks: str = "auto", chunks_maxsize: dict = {}, api_timeout: int = 0, **kwargs, ): """ Instantiate an ERDDAP Argo data fetcher Parameters ---------- ds: str (optional) Dataset to load: 'phy' or 'ref' or 'bgc' cache: bool (optional) Cache data or not (default: False) cachedir: str (optional) Path to cache folder parallel: bool (optional) Chunk request to use parallel fetching (default: False) parallel_method: str (optional) Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`. progress: bool (optional) Show a progress bar or not when ``parallel`` is set to True. chunks: 'auto' or dict of integers (optional) Dictionary with request access point as keys and number of chunks to create as values. Eg: {'wmo': 10} will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``. chunks_maxsize: dict (optional) Dictionary with request access point as keys and chunk size as values (used as maximum values in 'auto' chunking). Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each. api_timeout: int (optional) Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default. """ timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=timeout, size_policy='head') self.definition = "Ifremer erddap Argo data fetcher" self.dataset_id = OPTIONS["dataset"] if ds == "" else ds self.server = api_server if not isinstance(parallel, bool): parallel_method = parallel parallel = True if parallel_method not in ["thread"]: raise ValueError( "erddap only support multi-threading, use 'thread' instead of '%s'" % parallel_method) self.parallel = parallel self.parallel_method = parallel_method self.progress = progress self.chunks = chunks self.chunks_maxsize = chunks_maxsize self.init(**kwargs) self._init_erddapy()
def __init__(self, ds: str = "", cache: bool = False, cachedir: str = "", parallel: bool = False, parallel_method: str = "thread", progress: bool = False, chunks: str = "auto", chunks_maxsize: dict = {}, api_timeout: int = 0, **kwargs): """ Instantiate an Argovis Argo data loader Parameters ---------- ds: str (optional) Dataset to load: 'phy' or 'bgc' cache: bool (optional) Cache data or not (default: False) cachedir: str (optional) Path to cache folder parallel: bool (optional) Chunk request to use parallel fetching (default: False) parallel_method: str (optional) Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`. progress: bool (optional) Show a progress bar or not when ``parallel`` is set to True. chunks: 'auto' or dict of integers (optional) Dictionary with request access point as keys and number of chunks to create as values. Eg: {'wmo': 10} will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``. chunks_maxsize: dict (optional) Dictionary with request access point as keys and chunk size as values (used as maximum values in 'auto' chunking). Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each. api_timeout: int (optional) Argovis API request time out in seconds. Set to OPTIONS['api_timeout'] by default. """ timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=timeout) self.definition = "Argovis Argo data fetcher" self.dataset_id = OPTIONS["dataset"] if ds == "" else ds self.server = api_server if not isinstance(parallel, bool): parallel_method = parallel parallel = True if parallel_method not in ["thread"]: raise ValueError( "argovis only support multi-threading, use 'thread' instead of '%s'" % parallel_method) self.parallel = parallel self.parallel_method = parallel_method self.progress = progress self.chunks = chunks self.chunks_maxsize = chunks_maxsize self.init(**kwargs) self.key_map = { "date": "TIME", "date_qc": "TIME_QC", "lat": "LATITUDE", "lon": "LONGITUDE", "cycle_number": "CYCLE_NUMBER", "DATA_MODE": "DATA_MODE", "DIRECTION": "DIRECTION", "platform_number": "PLATFORM_NUMBER", "position_qc": "POSITION_QC", "pres": "PRES", "temp": "TEMP", "psal": "PSAL", "index": "N_POINTS", }
def __init__( self, ds: str = "", cache: bool = False, cachedir: str = "", parallel: bool = False, parallel_method: str = "thread", progress: bool = False, chunks: str = "auto", chunks_maxsize: dict = {}, api_timeout: int = 0, **kwargs, ): """ Instantiate an ERDDAP Argo data fetcher Parameters ---------- ds: str (optional) Dataset to load: 'phy' or 'ref' or 'bgc' cache: bool (optional) Cache data or not (default: False) cachedir: str (optional) Path to cache folder parallel: bool (optional) Chunk request to use parallel fetching (default: False) parallel_method: str (optional) Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`. progress: bool (optional) Show a progress bar or not when ``parallel`` is set to True. chunks: 'auto' or dict of integers (optional) Dictionary with request access point as keys and number of chunks to create as values. Eg: {'wmo': 10} will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``. chunks_maxsize: dict (optional) Dictionary with request access point as keys and chunk size as values (used as maximum values in 'auto' chunking). Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each. api_timeout: int (optional) Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default. """ # Temporary fix for issue discussed here: https://github.com/euroargodev/argopy/issues/63#issuecomment-742379699 version_tup = tuple(int(x) for x in fsspec.__version__.split(".")) if cache and version_tup[0] == 0 and version_tup[ 1] == 8 and version_tup[-1] == 4: cache = False warnings.warn( "Cache is impossible with fsspec version 0.8.4, please upgrade or downgrade to use cache.\n Moving to non cached file system" ) timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=timeout, size_policy='head') self.definition = "Ifremer erddap Argo data fetcher" self.dataset_id = OPTIONS["dataset"] if ds == "" else ds self.server = api_server if not isinstance(parallel, bool): parallel_method = parallel parallel = True if parallel_method not in ["thread"]: raise ValueError( "erddap only support multi-threading, use 'thread' instead of '%s'" % parallel_method) self.parallel = parallel self.parallel_method = parallel_method self.progress = progress self.chunks = chunks self.chunks_maxsize = chunks_maxsize self.init(**kwargs) self._init_erddapy()
def test_nocachefile(self): fs = httpstore(cache=1) with pytest.raises(CacheFileNotFound): fs.cachepath("dummy_uri")
def test_open_json(self): uri = "https://argovis.colorado.edu/catalog/mprofiles/?ids=['6902746_12']" fs = httpstore() assert is_list_of_dicts(fs.open_json(uri))
def test_nocache(self): fs = httpstore(cache=0) with pytest.raises(FileSystemHasNoCache): fs.cachepath("dummy_uri")