def test_getitem_ancillary_variables(): expected = anc.set_coords(["q_error_limit", "q_detection_limit"])["q"] assert_identical(anc.cf["q"], expected) assert_identical(anc.cf["specific_humidity"], expected) with pytest.warns(UserWarning): anc[["q"]].cf["q"] with pytest.warns(None) as record: with cf_xarray.set_options(warn_on_missing_variables=False): anc[["q"]].cf["q"] assert len(record) == 0 for k in ["ULONG", "ULAT"]: assert k not in popds.cf["TEMP"].coords for k in ["TLONG", "TLAT"]: assert k not in popds.cf["UVEL"].coords
def __init__(self, kwargs): """ Parameters ---------- kwargs: dict Can contain arguments to pass onto the base ErddapReader class (known_server, protocol, server, parallel). The dict entries to initialize this class are: * kw: dict Contains space and time search constraints: `min_lon`, `max_lon`, `min_lat`, `max_lat`, `min_time`, `max_time`. * variables: string or list, optional Variable names if you want to limit the search to those. The variable name or names must be from the list available in `odg.all_variables(server)` for the specific ERDDAP server and pass the check in `odg.check_variables(server, variables)`. Alternatively, if the user inputs criteria, variables can be a list of the keys from criteria. * criteria: dict, optional A dictionary describing how to recognize variables by their name and attributes with regular expressions to be used with `cf-xarray`. It can be local or a URL point to a nonlocal gist. This is required for running QC in Gateway. For example: >>> my_custom_criteria = {"salt": { ... "standard_name": "sea_water_salinity$|sea_water_practical_salinity$", ... "name": (?i)sal$|(?i)s.sea_water_practical_salinity$"}} * var_def: dict, optional A dictionary with the same keys as criteria (criteria can have more) that describes QC definitions and units. It should include the variable units, fail_span, and suspect_span. For example: >>> var_def = {"salt": {"units": "psu", ... "fail_span": [-10, 60], "suspect_span": [-1, 45]}} """ assert isinstance(kwargs, dict), "input arguments as dictionary" er_kwargs = { "known_server": kwargs.get("known_server", "ioos"), "protocol": kwargs.get("protocol", None), "server": kwargs.get("server", None), "parallel": kwargs.get("parallel", True), } ErddapReader.__init__(self, **er_kwargs) kw = kwargs["kw"] variables = kwargs.get("variables", None) self.approach = "region" self._stations = None # run checks for KW # check for lon/lat values and time self.kw = kw # check for custom criteria to set up cf-xarray if "criteria" in kwargs: criteria = kwargs["criteria"] # link to nonlocal dictionary definition if isinstance(criteria, str) and criteria[:4] == "http": criteria = odg.return_response(criteria) cf_xarray.set_options(custom_criteria=criteria) self.criteria = criteria else: self.criteria = None if (variables is not None) and (not isinstance(variables, list)): variables = [variables] # make sure variables are on parameter list if variables is not None: # User is using criteria and variable nickname approach if self.criteria and all(var in self.criteria for var in variables): # first translate the input variable nicknames to variable names # that are specific to the reader. variables = odg.select_variables(self.e.server, self.criteria, variables) # user is inputting specific reader variable names else: odg.check_variables(self.e.server, variables) # record the number of variables so that a user can change it and # the change can be compared. self.num_variables = len(variables) else: self.num_variables = 0 self.variables = variables
def __init__(self, *args, **kwargs): """ Parameters ---------- kw: dict Contains space and time search constraints: `min_lon`, `max_lon`, `min_lat`, `max_lat`, `min_time`, `max_time`. approach: string approach is defined as 'stations' or 'region' depending on user choice. parallel: boolean, optional If True, run with simple parallelization using `multiprocessing`. If False, run serially. True by default. If input in this manner, the same value is used for all readers. If input by individual reader dictionary, the value can vary by reader. readers: ocean_data_gateway Reader, list of readers, optional Use this to use fewer than the full set of readers. For example, `readers=odg.erddap` or to specifically include all by name `readers = [odg.ErddapReader, odg.axdsReader, odg.localReader]`. erddap: dict, optional Dictionary of reader specifications. For example, `erddap={'known_server': 'ioos'}`. See odg.erddap.ErddapReader for more input options. axds: dict, optional Dictionary of reader specifications. For example, `axds={'axds_type': 'platform2'}`. See odg.axds.AxdsReader for more input options. local: dict, optional Dictionary of reader specifications. For example, `local={'filenames': filenames}` for a list of filenames. See odg.local.LocalReader for more input options. criteria: dict, str, optional A dictionary describing how to recognize variables by their name and attributes with regular expressions to be used with `cf-xarray`. It can be local or a URL point to a nonlocal gist. This is required for running QC in Gateway. For example: >>> my_custom_criteria = {"salt": { ... "standard_name": "sea_water_salinity$|sea_water_practical_salinity$", ... "name": (?i)sal$|(?i)s.sea_water_practical_salinity$"}} var_def: dict, optional A dictionary with the same keys as criteria (criteria can have more) that describes QC definitions and units. It should include the variable units, fail_span, and suspect_span. For example: >>> var_def = {"salt": {"units": "psu", ... "fail_span": [-10, 60], "suspect_span": [-1, 45]}} Notes ----- To select search variables, input the variable names to each reader individually in the format `erddap={'variables': [list of variables]}`. Make sure that the variable names are correct for each individual reader. Check individual reader docs for more information. Alternatively, the user can input `criteria` and then input as variables the nicknames provided in `criteria` for variable names. These should then be input generally, not to an individual reader. Input keyword arguments that are not specific to one of the readers will be collected in local dictionary kwargs_all. These may include "approach", "parallel", "kw" containing the time and space region to search for, etc. Input keyword arguments that are specific to readers will be collected in local dictionary kwargs. """ # make sure only known keys are input in kwargs unknown_keys = set(list(kwargs.keys())) - set(odg.keys_kwargs) assertion = f"keys into Gateway {unknown_keys} are unknown." assert len(unknown_keys) == 0, assertion # set up a dictionary for general input kwargs exclude_keys = ["erddap", "axds", "local"] kwargs_all = { k: kwargs[k] for k in set(list(kwargs.keys())) - set(exclude_keys) } self.kwargs_all = kwargs_all # default approach is region if "approach" not in self.kwargs_all: self.kwargs_all["approach"] = "region" assertion = '`approach` has to be "region" or "stations"' assert self.kwargs_all["approach"] in ["region", "stations"], assertion # check for custom criteria to set up cf-xarray if "criteria" in self.kwargs_all: criteria = self.kwargs_all["criteria"] # link to nonlocal dictionary definition if isinstance(criteria, str) and criteria[:4] == "http": criteria = odg.return_response(criteria) cf_xarray.set_options(custom_criteria=criteria) self.criteria = criteria else: self.criteria = None # user-input variable definitions for QC if "var_def" in self.kwargs_all: var_def = self.kwargs_all["var_def"] # link to nonlocal dictionary definition if isinstance(var_def, str) and var_def[:4] == "http": var_def = odg.return_response(var_def) self.var_def = var_def else: self.var_def = None # if both criteria and var_def are input by user, make sure the keys # in var_def are all available in criteria. if self.criteria and self.var_def: assertion = ( "All variable keys in `var_def` must be available in `criteria`." ) assert all(elem in self.criteria for elem in self.var_def), assertion self.kwargs = kwargs self.sources self.store = dict()
def test_custom_criteria(): my_custom_criteria = { "ssh": { "standard_name": "sea_surface_elev*|sea_surface_height", "name": "sea_surface_elevation$", # variable name }, "salt": { "standard_name": "salinity", "name": "sal*", }, "wind_speed": { "standard_name": "wind_speed$", }, } my_custom_criteria2 = {"temp": {"name": "temperature"}} my_custom_criteria_list = [my_custom_criteria, my_custom_criteria2] my_custom_criteria_tuple = (my_custom_criteria, my_custom_criteria2) cf_xarray.set_options(custom_criteria=my_custom_criteria) # Match by name regex match ds = xr.Dataset() ds["salinity"] = ("dim", np.arange(10)) assert_identical(ds.cf["salt"], ds["salinity"]) # Match by standard_name regex match ds = xr.Dataset() ds["elev"] = ("dim", np.arange(10), {"standard_name": "sea_surface_elevBLAH"}) assert_identical(ds.cf["ssh"], ds["elev"]) # Match by standard_name exact match ds = xr.Dataset() ds["salinity"] = ("dim", np.arange(10), {"standard_name": "salinity"}) assert_identical(ds.cf["salt"], ds["salinity"]) # If not exact name, won't match ds = xr.Dataset() ds["sea_surface_elevation123"] = ("dim", np.arange(10)) # Since this will not match, this should error with pytest.raises(KeyError): ds.cf["ssh"] # will select only one variable here since exact match ds = xr.Dataset() ds["winds"] = ("dim", np.arange(10), {"standard_name": "wind_speed"}) ds["gusts"] = ("dim", np.arange(10), {"standard_name": "wind_speed_of_gust"}) assert_identical(ds.cf["wind_speed"], ds["winds"]) # Match by exact name ds = xr.Dataset() ds["sea_surface_elevation"] = ("dim", np.arange(10)) ds["sea_surface_height"] = ( "dim", np.arange(10), {"standard_name": "sea_surface_elevBLAH"}, ) # Since there are two variables, this should error with pytest.raises(KeyError): ds.cf["ssh"] # But the following should work instead given the two ssh variables assert_identical( ds.cf[["ssh"]], ds[["sea_surface_elevation", "sea_surface_height"]] ) # test criteria list of dicts with cf_xarray.set_options(custom_criteria=my_custom_criteria_list): ds = xr.Dataset() ds["temperature"] = ("dim", np.arange(10)) assert_identical(ds.cf["temp"], ds["temperature"]) # test criteria tuple of dicts with cf_xarray.set_options(custom_criteria=my_custom_criteria_tuple): ds = xr.Dataset() ds["temperature"] = ("dim", np.arange(10)) assert_identical(ds.cf["temp"], ds["temperature"])
def test_options(): # test for inputting a nonexistent option with pytest.raises(ValueError): cfxr.set_options(DISPLAY_WIDTH=80)