Beispiel #1
0
    def __init__(self,
                 local_ftp: str = "",
                 index_file: str = "ar_index_global_prof.txt",
                 cache: bool = False,
                 cachedir: str = "",
                 **kwargs):
        """ Init fetcher

            Parameters
            ----------
            local_path : str
                Path to the directory with the 'dac' folder and index file
        """
        self.cache = cache
        self.definition = 'Local ftp Argo index fetcher'
        self.local_ftp = OPTIONS['local_ftp'] if local_ftp == '' else local_ftp
        check_localftp(self.local_ftp, errors='raise')  # Validate local_ftp
        self.index_file = index_file
        self.fs = indexstore(cache, cachedir, os.path.sep.join([self.local_ftp, self.index_file]))
        self.dataset_id = 'index'
        self.init(**kwargs)
Beispiel #2
0
def test_check_localftp():
    assert check_localftp("dummy_path", errors='ignore') is False
    with pytest.raises(FtpPathError):
        check_localftp("dummy_path", errors='raise')
    with pytest.warns(UserWarning):
        assert check_localftp("dummy_path", errors='warn') is False
Beispiel #3
0
    def __init__(self,
                 local_ftp: str = "",
                 ds: str = "",
                 cache: bool = False,
                 cachedir: str = "",
                 dimension: str = 'point',
                 errors: str = 'raise',
                 parallel: bool = False,
                 parallel_method: str = 'thread',
                 progress: bool = False,
                 chunks: str = 'auto',
                 chunks_maxsize: dict = {},
                 **kwargs):
        """ Init fetcher

        Parameters
        ----------
        local_ftp: str (optional)
            Path to the local directory where the 'dac' folder is located.
        ds: str (optional)
            Dataset to load: 'phy' or 'ref' or 'bgc'
        errors: str (optional)
            If set to 'raise' (default), will raise a NetCDF4FileNotFoundError error if any of the requested
            files cannot be found. If set to 'ignore', the file not found is skipped when fetching data.
        cache: bool (optional)
            Cache data or not (default: False)
        cachedir: str (optional)
            Path to cache folder
        dimension: str
            Main dimension of the output dataset. This can be "profile" to retrieve a collection of
            profiles, or "point" (default) to have data as a collection of measurements.
            This can be used to optimise performances.
        parallel: bool (optional)
            Chunk request to use parallel fetching (default: False)
        parallel_method: str (optional)
            Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`.
        progress: bool (optional)
            Show a progress bar or not when fetching data.
        chunks: 'auto' or dict of integers (optional)
            Dictionary with request access point as keys and number of chunks to create as values.
            Eg:

                - ``{'wmo': 10}`` will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``.
                - ``{'lon': 2}`` will create a maximum of 2 chunks along longitude when used with ``Fetch_box``.

        chunks_maxsize: dict (optional)
            Dictionary with request access point as keys and chunk size as values (used as maximum values in
            'auto' chunking).
            Eg: ``{'wmo': 5}`` will create chunks with as many as 5 WMOs each.
        """
        self.cache = cache
        self.cachedir = cachedir
        self.fs = filestore(cache=self.cache, cachedir=self.cachedir)
        self.errors = errors

        if not isinstance(parallel, bool):
            # The parallelization method is passed through the argument 'parallel':
            parallel_method = parallel
            if parallel in ['thread', 'process']:
                parallel = True
        if parallel_method not in ["thread", "process"]:
            raise ValueError("localftp only support multi-threading and processing ('%s' unknown)" % parallel_method)
        self.parallel = parallel
        self.parallel_method = parallel_method
        self.progress = progress
        self.chunks = chunks
        self.chunks_maxsize = chunks_maxsize

        self.definition = 'Local ftp Argo data fetcher'
        self.dataset_id = OPTIONS['dataset'] if ds == '' else ds

        self.local_ftp = OPTIONS['local_ftp'] if local_ftp == '' else local_ftp
        check_localftp(self.local_ftp, errors='raise')  # Validate local_ftp

        self.init(**kwargs)