Example #1
0
    def __init__(self,
                 ds: str = "",
                 cache: bool = False,
                 cachedir: str = "",
                 **kwargs):
        """ Instantiate an Argovis Argo data loader

            Parameters
            ----------
            ds: 'phy'
            cache : False
            cachedir : None
        """
        self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=120)
        self.definition = 'Argovis Argo data fetcher'
        self.dataset_id = OPTIONS['dataset'] if ds == '' else ds
        self.server = api_server
        self.init(**kwargs)
        self.key_map = {
            'date': 'TIME',
            'date_qc': 'TIME_QC',
            'lat': 'LATITUDE',
            'lon': 'LONGITUDE',
            'cycle_number': 'CYCLE_NUMBER',
            'DATA_MODE': 'DATA_MODE',
            'DIRECTION': 'DIRECTION',
            'platform_number': 'PLATFORM_NUMBER',
            'position_qc': 'POSITION_QC',
            'pres': 'PRES',
            'temp': 'TEMP',
            'psal': 'PSAL',
            'index': 'N_POINTS'
        }
Example #2
0
def open_etopo1(box, res='l'):
    """ Download ETOPO for a box

        Parameters
        ----------
        box: [xmin, xmax, ymin, ymax]

        Returns
        -------
        xarray.Dataset
    """
    # This function is in utilities to anticipate usage outside of plotting, eg interpolation, grounding detection
    resx, resy = 0.1, 0.1
    if res == 'h':
        resx, resy = 0.016, 0.016

    uri = (
        "https://gis.ngdc.noaa.gov/mapviewer-support/wcs-proxy/wcs.groovy?filename=etopo1.nc"
        "&request=getcoverage&version=1.0.0&service=wcs&coverage=etopo1&CRS=EPSG:4326&format=netcdf"
        "&resx={}&resy={}"
        "&bbox={}").format
    thisurl = uri(resx, resy,
                  ",".join([str(b) for b in [box[0], box[2], box[1], box[3]]]))
    ds = httpstore(cache=True).open_dataset(thisurl)
    da = ds['Band1'].rename("topo")
    for a in ds.attrs:
        da.attrs[a] = ds.attrs[a]
    da.attrs['Data source'] = 'https://maps.ngdc.noaa.gov/viewers/wcs-client/'
    da.attrs['URI'] = thisurl
    return da
Example #3
0
 def __init__(self, cache: bool = False, cachedir: str = "", **kwargs):
     """ Instantiate an ERDDAP Argo index loader with force caching """
     self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=120)
     self.definition = 'Ifremer erddap Argo index fetcher'
     self.dataset_id = 'index'
     self.server = api_server
     self.init(**kwargs)
     self._init_erddapy()
Example #4
0
def erddap_ds_exists(ds="ArgoFloats"):
    """ Given erddap fetcher, check if a Dataset exists, return a bool"""
    # e = ArgoDataFetcher(src='erddap').float(wmo=0).fetcher
    # erddap_index = json.load(urlopen(e.erddap.server + "/info/index.json"))
    # erddap_index = json.load(urlopen("http://www.ifremer.fr/erddap/info/index.json"))
    with httpstore(timeout=120).open(
            "http://www.ifremer.fr/erddap/info/index.json") as of:
        erddap_index = json.load(of)
    return ds in [row[-1] for row in erddap_index['table']['rows']]
Example #5
0
 def test_clear_cache(self):
     uri = "https://github.com/euroargodev/argopy-data/raw/master/ftp/ar_index_global_prof.txt"
     with tempfile.TemporaryDirectory() as cachedir:
         fs = httpstore(cache=True, cachedir=cachedir)
         fs.read_csv(uri, skiprows=8, header=0)
         assert isinstance(fs.cachepath(uri), str)
         assert os.path.isfile(fs.cachepath(uri))
         fs.clear_cache()
         with pytest.raises(CacheFileNotFound):
             fs.cachepath(uri)
Example #6
0
 def test_open_mfjson(self):
     fs = httpstore()
     uri = [
         "https://argovis.colorado.edu/catalog/mprofiles/?ids=['6902746_%i']"
         % i for i in [12, 13]
     ]
     for method in ["seq", "thread"]:
         for progress in [True, False]:
             lst = fs.open_mfjson(uri, method=method, progress=progress)
             assert all(is_list_of_dicts(x) for x in lst)
Example #7
0
 def test_cachefile(self):
     uri = 'https://github.com/euroargodev/argopy-data/raw/master/ftp/ar_index_global_prof.txt'
     testcachedir = os.path.expanduser(os.path.join("~", ".argopytest_tmp"))
     try:
         fs = httpstore(cache=1, cachedir=testcachedir)
         fs.open_dataframe(uri, skiprows=8, header=0)
         assert isinstance(fs.cachepath(uri), str)
         shutil.rmtree(testcachedir)
     except Exception:
         shutil.rmtree(testcachedir)
         raise
Example #8
0
 def test_open_mfdataset(self):
     fs = httpstore()
     uri = [
         "https://github.com/euroargodev/argopy-data/raw/master/ftp/dac/csiro/5900865/profiles/D5900865_00%i.nc"
         % i for i in [1, 2]
     ]
     for method in ["seq", "thread"]:
         for progress in [True, False]:
             assert isinstance(
                 fs.open_mfdataset(uri, method=method, progress=progress),
                 xr.Dataset)
             assert is_list_of_datasets(
                 fs.open_mfdataset(uri,
                                   method=method,
                                   progress=progress,
                                   concat=False))
Example #9
0
    def __init__(self,
                 ds: str = "",
                 cache: bool = False,
                 cachedir: str = "",
                 **kwargs):
        """ Instantiate an ERDDAP Argo data loader

            Parameters
            ----------
            ds: 'phy' or 'ref' or 'bgc'
            cache : False
            cachedir : None
        """

        self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=120)
        self.definition = 'Ifremer erddap Argo data fetcher'
        self.dataset_id = OPTIONS['dataset'] if ds == '' else ds
        self.init(**kwargs)
        self._init_erddapy()
Example #10
0
 def test_cache(self):
     fs = httpstore(cache=1)
     assert isinstance(
         fs.fs, fsspec.implementations.cached.WholeFileCacheFileSystem)
Example #11
0
 def test_open_dataset(self):
     uri = 'https://github.com/euroargodev/argopy-data/raw/master/ftp/dac/csiro/5900865/5900865_prof.nc'
     fs = httpstore()
     assert isinstance(fs.open_dataset(uri), xr.Dataset)
Example #12
0
 def test_open_dataframe(self):
     uri = 'https://github.com/euroargodev/argopy-data/raw/master/ftp/ar_index_global_prof.txt'
     fs = httpstore()
     assert isinstance(fs.open_dataframe(uri, skiprows=8, header=0),
                       pd.core.frame.DataFrame)
Example #13
0
 def test_creation(self):
     fs = httpstore(cache=0)
     assert isinstance(fs.fs, fsspec.implementations.http.HTTPFileSystem)
Example #14
0
    def __init__(
        self,
        ds: str = "",
        cache: bool = False,
        cachedir: str = "",
        parallel: bool = False,
        parallel_method: str = "thread",
        progress: bool = False,
        chunks: str = "auto",
        chunks_maxsize: dict = {},
        api_timeout: int = 0,
        **kwargs,
    ):
        """ Instantiate an ERDDAP Argo data fetcher

        Parameters
        ----------
        ds: str (optional)
            Dataset to load: 'phy' or 'ref' or 'bgc'
        cache: bool (optional)
            Cache data or not (default: False)
        cachedir: str (optional)
            Path to cache folder
        parallel: bool (optional)
            Chunk request to use parallel fetching (default: False)
        parallel_method: str (optional)
            Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`.
        progress: bool (optional)
            Show a progress bar or not when ``parallel`` is set to True.
        chunks: 'auto' or dict of integers (optional)
            Dictionary with request access point as keys and number of chunks to create as values.
            Eg: {'wmo': 10} will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``.
        chunks_maxsize: dict (optional)
            Dictionary with request access point as keys and chunk size as values (used as maximum values in
            'auto' chunking).
            Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each.
        api_timeout: int (optional)
            Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default.
        """
        timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout
        self.fs = httpstore(cache=cache,
                            cachedir=cachedir,
                            timeout=timeout,
                            size_policy='head')
        self.definition = "Ifremer erddap Argo data fetcher"
        self.dataset_id = OPTIONS["dataset"] if ds == "" else ds
        self.server = api_server

        if not isinstance(parallel, bool):
            parallel_method = parallel
            parallel = True
        if parallel_method not in ["thread"]:
            raise ValueError(
                "erddap only support multi-threading, use 'thread' instead of '%s'"
                % parallel_method)
        self.parallel = parallel
        self.parallel_method = parallel_method
        self.progress = progress
        self.chunks = chunks
        self.chunks_maxsize = chunks_maxsize

        self.init(**kwargs)
        self._init_erddapy()
Example #15
0
    def __init__(self,
                 ds: str = "",
                 cache: bool = False,
                 cachedir: str = "",
                 parallel: bool = False,
                 parallel_method: str = "thread",
                 progress: bool = False,
                 chunks: str = "auto",
                 chunks_maxsize: dict = {},
                 api_timeout: int = 0,
                 **kwargs):
        """ Instantiate an Argovis Argo data loader

        Parameters
        ----------
        ds: str (optional)
            Dataset to load: 'phy' or 'bgc'
        cache: bool (optional)
            Cache data or not (default: False)
        cachedir: str (optional)
            Path to cache folder
        parallel: bool (optional)
            Chunk request to use parallel fetching (default: False)
        parallel_method: str (optional)
            Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`.
        progress: bool (optional)
            Show a progress bar or not when ``parallel`` is set to True.
        chunks: 'auto' or dict of integers (optional)
            Dictionary with request access point as keys and number of chunks to create as values.
            Eg: {'wmo': 10} will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``.
        chunks_maxsize: dict (optional)
            Dictionary with request access point as keys and chunk size as values (used as maximum values in
            'auto' chunking).
            Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each.
        api_timeout: int (optional)
            Argovis API request time out in seconds. Set to OPTIONS['api_timeout'] by default.
        """
        timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout
        self.fs = httpstore(cache=cache, cachedir=cachedir, timeout=timeout)
        self.definition = "Argovis Argo data fetcher"
        self.dataset_id = OPTIONS["dataset"] if ds == "" else ds
        self.server = api_server

        if not isinstance(parallel, bool):
            parallel_method = parallel
            parallel = True
        if parallel_method not in ["thread"]:
            raise ValueError(
                "argovis only support multi-threading, use 'thread' instead of '%s'"
                % parallel_method)
        self.parallel = parallel
        self.parallel_method = parallel_method
        self.progress = progress
        self.chunks = chunks
        self.chunks_maxsize = chunks_maxsize

        self.init(**kwargs)
        self.key_map = {
            "date": "TIME",
            "date_qc": "TIME_QC",
            "lat": "LATITUDE",
            "lon": "LONGITUDE",
            "cycle_number": "CYCLE_NUMBER",
            "DATA_MODE": "DATA_MODE",
            "DIRECTION": "DIRECTION",
            "platform_number": "PLATFORM_NUMBER",
            "position_qc": "POSITION_QC",
            "pres": "PRES",
            "temp": "TEMP",
            "psal": "PSAL",
            "index": "N_POINTS",
        }
Example #16
0
    def __init__(
        self,
        ds: str = "",
        cache: bool = False,
        cachedir: str = "",
        parallel: bool = False,
        parallel_method: str = "thread",
        progress: bool = False,
        chunks: str = "auto",
        chunks_maxsize: dict = {},
        api_timeout: int = 0,
        **kwargs,
    ):
        """ Instantiate an ERDDAP Argo data fetcher

        Parameters
        ----------
        ds: str (optional)
            Dataset to load: 'phy' or 'ref' or 'bgc'
        cache: bool (optional)
            Cache data or not (default: False)
        cachedir: str (optional)
            Path to cache folder
        parallel: bool (optional)
            Chunk request to use parallel fetching (default: False)
        parallel_method: str (optional)
            Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`.
        progress: bool (optional)
            Show a progress bar or not when ``parallel`` is set to True.
        chunks: 'auto' or dict of integers (optional)
            Dictionary with request access point as keys and number of chunks to create as values.
            Eg: {'wmo': 10} will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``.
        chunks_maxsize: dict (optional)
            Dictionary with request access point as keys and chunk size as values (used as maximum values in
            'auto' chunking).
            Eg: {'wmo': 5} will create chunks with as many as 5 WMOs each.
        api_timeout: int (optional)
            Erddap request time out in seconds. Set to OPTIONS['api_timeout'] by default.
        """

        # Temporary fix for issue discussed here: https://github.com/euroargodev/argopy/issues/63#issuecomment-742379699
        version_tup = tuple(int(x) for x in fsspec.__version__.split("."))
        if cache and version_tup[0] == 0 and version_tup[
                1] == 8 and version_tup[-1] == 4:
            cache = False
            warnings.warn(
                "Cache is impossible with fsspec version 0.8.4, please upgrade or downgrade to use cache.\n Moving to non cached file system"
            )

        timeout = OPTIONS["api_timeout"] if api_timeout == 0 else api_timeout
        self.fs = httpstore(cache=cache,
                            cachedir=cachedir,
                            timeout=timeout,
                            size_policy='head')
        self.definition = "Ifremer erddap Argo data fetcher"
        self.dataset_id = OPTIONS["dataset"] if ds == "" else ds
        self.server = api_server

        if not isinstance(parallel, bool):
            parallel_method = parallel
            parallel = True
        if parallel_method not in ["thread"]:
            raise ValueError(
                "erddap only support multi-threading, use 'thread' instead of '%s'"
                % parallel_method)
        self.parallel = parallel
        self.parallel_method = parallel_method
        self.progress = progress
        self.chunks = chunks
        self.chunks_maxsize = chunks_maxsize

        self.init(**kwargs)
        self._init_erddapy()
Example #17
0
 def test_nocachefile(self):
     fs = httpstore(cache=1)
     with pytest.raises(CacheFileNotFound):
         fs.cachepath("dummy_uri")
Example #18
0
 def test_open_json(self):
     uri = "https://argovis.colorado.edu/catalog/mprofiles/?ids=['6902746_12']"
     fs = httpstore()
     assert is_list_of_dicts(fs.open_json(uri))
Example #19
0
 def test_nocache(self):
     fs = httpstore(cache=0)
     with pytest.raises(FileSystemHasNoCache):
         fs.cachepath("dummy_uri")