Esempio n. 1
0
def test_getitem_ancillary_variables():
    expected = anc.set_coords(["q_error_limit", "q_detection_limit"])["q"]
    assert_identical(anc.cf["q"], expected)
    assert_identical(anc.cf["specific_humidity"], expected)

    with pytest.warns(UserWarning):
        anc[["q"]].cf["q"]

    with pytest.warns(None) as record:
        with cf_xarray.set_options(warn_on_missing_variables=False):
            anc[["q"]].cf["q"]
            assert len(record) == 0

    for k in ["ULONG", "ULAT"]:
        assert k not in popds.cf["TEMP"].coords

    for k in ["TLONG", "TLAT"]:
        assert k not in popds.cf["UVEL"].coords
Esempio n. 2
0
    def __init__(self, kwargs):
        """
        Parameters
        ----------
        kwargs: dict
            Can contain arguments to pass onto the base ErddapReader class
            (known_server, protocol, server, parallel). The dict entries to
            initialize this class are:

            * kw: dict
              Contains space and time search constraints: `min_lon`, `max_lon`,
              `min_lat`, `max_lat`, `min_time`, `max_time`.
            * variables: string or list, optional
              Variable names if you want to limit the search to those. The variable name or names must be from the list available in `odg.all_variables(server)` for the specific ERDDAP server and pass the check in `odg.check_variables(server, variables)`.

              Alternatively, if the user inputs criteria, variables can be a
              list of the keys from criteria.
            * criteria: dict, optional
              A dictionary describing how to recognize variables by their name
              and attributes with regular expressions to be used with
              `cf-xarray`. It can be local or a URL point to a nonlocal gist.
              This is required for running QC in Gateway. For example:
              >>> my_custom_criteria = {"salt": {
              ... "standard_name": "sea_water_salinity$|sea_water_practical_salinity$",
              ... "name": (?i)sal$|(?i)s.sea_water_practical_salinity$"}}
            * var_def: dict, optional
              A dictionary with the same keys as criteria (criteria can have
              more) that describes QC definitions and units. It should include
              the variable units, fail_span, and suspect_span. For example:
              >>> var_def = {"salt": {"units": "psu",
              ... "fail_span": [-10, 60], "suspect_span": [-1, 45]}}
        """
        assert isinstance(kwargs, dict), "input arguments as dictionary"
        er_kwargs = {
            "known_server": kwargs.get("known_server", "ioos"),
            "protocol": kwargs.get("protocol", None),
            "server": kwargs.get("server", None),
            "parallel": kwargs.get("parallel", True),
        }
        ErddapReader.__init__(self, **er_kwargs)

        kw = kwargs["kw"]
        variables = kwargs.get("variables", None)

        self.approach = "region"

        self._stations = None

        # run checks for KW
        # check for lon/lat values and time
        self.kw = kw

        # check for custom criteria to set up cf-xarray
        if "criteria" in kwargs:
            criteria = kwargs["criteria"]
            # link to nonlocal dictionary definition
            if isinstance(criteria, str) and criteria[:4] == "http":
                criteria = odg.return_response(criteria)
            cf_xarray.set_options(custom_criteria=criteria)
            self.criteria = criteria
        else:
            self.criteria = None

        if (variables is not None) and (not isinstance(variables, list)):
            variables = [variables]

        # make sure variables are on parameter list
        if variables is not None:
            # User is using criteria and variable nickname approach
            if self.criteria and all(var in self.criteria
                                     for var in variables):
                # first translate the input variable nicknames to variable names
                # that are specific to the reader.
                variables = odg.select_variables(self.e.server, self.criteria,
                                                 variables)

            # user is inputting specific reader variable names
            else:
                odg.check_variables(self.e.server, variables)
            # record the number of variables so that a user can change it and
            # the change can be compared.
            self.num_variables = len(variables)
        else:
            self.num_variables = 0
        self.variables = variables
Esempio n. 3
0
    def __init__(self, *args, **kwargs):
        """
        Parameters
        ----------
        kw: dict
            Contains space and time search constraints: `min_lon`, `max_lon`,
            `min_lat`, `max_lat`, `min_time`, `max_time`.
        approach: string
            approach is defined as 'stations' or 'region' depending on user
            choice.
        parallel: boolean, optional
            If True, run with simple parallelization using `multiprocessing`.
            If False, run serially. True by default. If input in this manner,
            the same value is used for all readers. If input by individual
            reader dictionary, the value can vary by reader.
        readers: ocean_data_gateway Reader, list of readers, optional
            Use this to use fewer than the full set of readers. For example,
            `readers=odg.erddap` or to specifically include all by name
            `readers = [odg.ErddapReader, odg.axdsReader, odg.localReader]`.
        erddap: dict, optional
            Dictionary of reader specifications. For example,
            `erddap={'known_server': 'ioos'}`. See odg.erddap.ErddapReader for
            more input options.
        axds: dict, optional
            Dictionary of reader specifications. For example,
            `axds={'axds_type': 'platform2'}`. See odg.axds.AxdsReader for
            more input options.
        local: dict, optional
            Dictionary of reader specifications. For example,
            `local={'filenames': filenames}` for a list of filenames.
            See odg.local.LocalReader for more input options.
        criteria: dict, str, optional
          A dictionary describing how to recognize variables by their name
          and attributes with regular expressions to be used with
          `cf-xarray`. It can be local or a URL point to a nonlocal gist.
          This is required for running QC in Gateway. For example:
          >>> my_custom_criteria = {"salt": {
          ... "standard_name": "sea_water_salinity$|sea_water_practical_salinity$",
          ... "name": (?i)sal$|(?i)s.sea_water_practical_salinity$"}}
        var_def: dict, optional
          A dictionary with the same keys as criteria (criteria can have
          more) that describes QC definitions and units. It should include
          the variable units, fail_span, and suspect_span. For example:
          >>> var_def = {"salt": {"units": "psu",
          ... "fail_span": [-10, 60], "suspect_span": [-1, 45]}}

        Notes
        -----
        To select search variables, input the variable names to each reader
        individually in the format `erddap={'variables': [list of variables]}`.
        Make sure that the variable names are correct for each individual
        reader. Check individual reader docs for more information.

        Alternatively, the user can input `criteria` and then input as variables
        the nicknames provided in `criteria` for variable names. These should
        then be input generally, not to an individual reader.

        Input keyword arguments that are not specific to one of the readers will be collected in local dictionary kwargs_all. These may include "approach", "parallel", "kw" containing the time and space region to search for, etc.

        Input keyword arguments that are specific to readers will be collected
        in local dictionary kwargs.
        """

        # make sure only known keys are input in kwargs
        unknown_keys = set(list(kwargs.keys())) - set(odg.keys_kwargs)
        assertion = f"keys into Gateway {unknown_keys} are unknown."
        assert len(unknown_keys) == 0, assertion

        # set up a dictionary for general input kwargs
        exclude_keys = ["erddap", "axds", "local"]
        kwargs_all = {
            k: kwargs[k]
            for k in set(list(kwargs.keys())) - set(exclude_keys)
        }

        self.kwargs_all = kwargs_all

        # default approach is region
        if "approach" not in self.kwargs_all:
            self.kwargs_all["approach"] = "region"

        assertion = '`approach` has to be "region" or "stations"'
        assert self.kwargs_all["approach"] in ["region", "stations"], assertion

        # check for custom criteria to set up cf-xarray
        if "criteria" in self.kwargs_all:
            criteria = self.kwargs_all["criteria"]
            # link to nonlocal dictionary definition
            if isinstance(criteria, str) and criteria[:4] == "http":
                criteria = odg.return_response(criteria)
            cf_xarray.set_options(custom_criteria=criteria)
            self.criteria = criteria
        else:
            self.criteria = None

        # user-input variable definitions for QC
        if "var_def" in self.kwargs_all:
            var_def = self.kwargs_all["var_def"]
            # link to nonlocal dictionary definition
            if isinstance(var_def, str) and var_def[:4] == "http":
                var_def = odg.return_response(var_def)
            self.var_def = var_def
        else:
            self.var_def = None

        # if both criteria and var_def are input by user, make sure the keys
        # in var_def are all available in criteria.
        if self.criteria and self.var_def:
            assertion = (
                "All variable keys in `var_def` must be available in `criteria`."
            )
            assert all(elem in self.criteria
                       for elem in self.var_def), assertion

        self.kwargs = kwargs
        self.sources

        self.store = dict()
Esempio n. 4
0
def test_custom_criteria():
    my_custom_criteria = {
        "ssh": {
            "standard_name": "sea_surface_elev*|sea_surface_height",
            "name": "sea_surface_elevation$",  # variable name
        },
        "salt": {
            "standard_name": "salinity",
            "name": "sal*",
        },
        "wind_speed": {
            "standard_name": "wind_speed$",
        },
    }
    my_custom_criteria2 = {"temp": {"name": "temperature"}}
    my_custom_criteria_list = [my_custom_criteria, my_custom_criteria2]
    my_custom_criteria_tuple = (my_custom_criteria, my_custom_criteria2)

    cf_xarray.set_options(custom_criteria=my_custom_criteria)

    # Match by name regex match
    ds = xr.Dataset()
    ds["salinity"] = ("dim", np.arange(10))
    assert_identical(ds.cf["salt"], ds["salinity"])

    # Match by standard_name regex match
    ds = xr.Dataset()
    ds["elev"] = ("dim", np.arange(10), {"standard_name": "sea_surface_elevBLAH"})
    assert_identical(ds.cf["ssh"], ds["elev"])

    # Match by standard_name exact match
    ds = xr.Dataset()
    ds["salinity"] = ("dim", np.arange(10), {"standard_name": "salinity"})
    assert_identical(ds.cf["salt"], ds["salinity"])

    # If not exact name, won't match
    ds = xr.Dataset()
    ds["sea_surface_elevation123"] = ("dim", np.arange(10))
    # Since this will not match, this should error
    with pytest.raises(KeyError):
        ds.cf["ssh"]

    # will select only one variable here since exact match
    ds = xr.Dataset()
    ds["winds"] = ("dim", np.arange(10), {"standard_name": "wind_speed"})
    ds["gusts"] = ("dim", np.arange(10), {"standard_name": "wind_speed_of_gust"})
    assert_identical(ds.cf["wind_speed"], ds["winds"])

    # Match by exact name
    ds = xr.Dataset()
    ds["sea_surface_elevation"] = ("dim", np.arange(10))
    ds["sea_surface_height"] = (
        "dim",
        np.arange(10),
        {"standard_name": "sea_surface_elevBLAH"},
    )
    # Since there are two variables, this should error
    with pytest.raises(KeyError):
        ds.cf["ssh"]
    # But the following should work instead given the two ssh variables
    assert_identical(
        ds.cf[["ssh"]], ds[["sea_surface_elevation", "sea_surface_height"]]
    )

    # test criteria list of dicts
    with cf_xarray.set_options(custom_criteria=my_custom_criteria_list):
        ds = xr.Dataset()
        ds["temperature"] = ("dim", np.arange(10))
        assert_identical(ds.cf["temp"], ds["temperature"])

    # test criteria tuple of dicts
    with cf_xarray.set_options(custom_criteria=my_custom_criteria_tuple):
        ds = xr.Dataset()
        ds["temperature"] = ("dim", np.arange(10))
        assert_identical(ds.cf["temp"], ds["temperature"])
Esempio n. 5
0
def test_options():

    # test for inputting a nonexistent option
    with pytest.raises(ValueError):
        cfxr.set_options(DISPLAY_WIDTH=80)