def test_file_cache_maxsize(): with pytest.raises(ValueError): xarray.set_options(file_cache_maxsize=0) original_size = FILE_CACHE.maxsize with xarray.set_options(file_cache_maxsize=123): assert FILE_CACHE.maxsize == 123 assert FILE_CACHE.maxsize == original_size
def test_nested_options(): original = OPTIONS['display_width'] with xarray.set_options(display_width=1): assert OPTIONS['display_width'] == 1 with xarray.set_options(display_width=2): assert OPTIONS['display_width'] == 2 assert OPTIONS['display_width'] == 1 assert OPTIONS['display_width'] == original
def test_keep_attrs(): with pytest.raises(ValueError): xarray.set_options(keep_attrs='invalid_str') with xarray.set_options(keep_attrs=True): assert OPTIONS['keep_attrs'] with xarray.set_options(keep_attrs=False): assert not OPTIONS['keep_attrs'] with xarray.set_options(keep_attrs='default'): assert _get_keep_attrs(default=True) assert not _get_keep_attrs(default=False)
def test_dataset_attr_retention(self): # Use .mean() for all tests: a typical reduction operation ds = create_test_dataset_attrs() original_attrs = ds.attrs # Test default behaviour result = ds.mean() assert result.attrs == {} with xarray.set_options(keep_attrs='default'): result = ds.mean() assert result.attrs == {} with xarray.set_options(keep_attrs=True): result = ds.mean() assert result.attrs == original_attrs with xarray.set_options(keep_attrs=False): result = ds.mean() assert result.attrs == {}
def test_groupby_attr_retention(self): da = xarray.DataArray([1, 2, 3], [('x', [1, 1, 2])]) da.attrs = {'attr1': 5, 'attr2': 'history', 'attr3': {'nested': 'more_info'}} original_attrs = da.attrs # Test default behaviour result = da.groupby('x').sum(keep_attrs=True) assert result.attrs == original_attrs with xarray.set_options(keep_attrs='default'): result = da.groupby('x').sum(keep_attrs=True) assert result.attrs == original_attrs with xarray.set_options(keep_attrs=True): result1 = da.groupby('x') result = result1.sum() assert result.attrs == original_attrs with xarray.set_options(keep_attrs=False): result = da.groupby('x').sum() assert result.attrs == {}
def test_display_width(): with pytest.raises(ValueError): xarray.set_options(display_width=0) with pytest.raises(ValueError): xarray.set_options(display_width=-10) with pytest.raises(ValueError): xarray.set_options(display_width=3.5)
def test_alignment(): ds1 = xr.Dataset({'a': ('x', [1, 2])}, {'x': [0, 1]}) ds2 = xr.Dataset({'a': ('x', [2, 3]), 'b': 4}, {'x': [1, 2]}) actual = np.add(ds1, ds2) expected = xr.Dataset({'a': ('x', [4])}, {'x': [1]}) assert_identical_(actual, expected) with xr.set_options(arithmetic_join='outer'): actual = np.add(ds1, ds2) expected = xr.Dataset({'a': ('x', [np.nan, 4, np.nan]), 'b': np.nan}, coords={'x': [0, 1, 2]}) assert_identical_(actual, expected)
def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex): days = [1., 2., 3.] da = DataArray(days, coords=[days], dims=['time'], name='test') ds = da.to_dataset() for v in ['test', 'time']: ds[v].attrs['units'] = 'days since 2001-01-01' ds[v].attrs['calendar'] = calendar if (not has_cftime and enable_cftimeindex and calendar not in coding.times._STANDARD_CALENDARS): with pytest.raises(ValueError): with set_options(enable_cftimeindex=enable_cftimeindex): ds = decode_cf(ds) else: with set_options(enable_cftimeindex=enable_cftimeindex): ds = decode_cf(ds) if (enable_cftimeindex and calendar not in coding.times._STANDARD_CALENDARS): assert ds.test.dtype == np.dtype('O') else: assert ds.test.dtype == np.dtype('M8[ns]')
def test_concat_cftimeindex(date_type, enable_cftimeindex): with xr.set_options(enable_cftimeindex=enable_cftimeindex): da1 = xr.DataArray( [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], dims=['time']) da2 = xr.DataArray( [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], dims=['time']) da = xr.concat([da1, da2], dim='time') if enable_cftimeindex and has_cftime: assert isinstance(da.indexes['time'], CFTimeIndex) else: assert isinstance(da.indexes['time'], pd.Index) assert not isinstance(da.indexes['time'], CFTimeIndex)
def test_alignment(): ds1 = xr.Dataset({'a': ('x', [1, 2])}, {'x': [0, 1]}) ds2 = xr.Dataset({'a': ('x', [2, 3]), 'b': 4}, {'x': [1, 2]}) actual = np.add(ds1, ds2) expected = xr.Dataset({'a': ('x', [4])}, {'x': [1]}) assert_identical_(actual, expected) with xr.set_options(arithmetic_join='outer'): actual = np.add(ds1, ds2) expected = xr.Dataset({ 'a': ('x', [np.nan, 4, np.nan]), 'b': np.nan }, coords={'x': [0, 1, 2]}) assert_identical_(actual, expected)
def test_repr_file_collapsed(tmp_path): arr = xr.DataArray(np.arange(300), dims="test") arr.to_netcdf(tmp_path / "test.nc", engine="netcdf4") with xr.open_dataarray(tmp_path / "test.nc") as arr, xr.set_options( display_expand_data=False ): actual = formatting.array_repr(arr) expected = dedent( """\ <xarray.DataArray (test: 300)> array([ 0, 1, 2, ..., 297, 298, 299]) Dimensions without coordinates: test""" ) assert actual == expected
def apply_correction(x: xr.DataArray, factor: xr.DataArray, kind: Optional[str] = None) -> xr.DataArray: """Apply the additive or multiplicative correction/adjustment factors. If kind is not given, default to the one stored in the "kind" attribute of factor. """ kind = kind or factor.get("kind", None) with xr.set_options(keep_attrs=True): if kind == ADDITIVE: out = x + factor elif kind == MULTIPLICATIVE: out = x * factor else: raise ValueError return out
def test_repr_html(self, converted_zarr): zarr_path_string = str(converted_zarr.absolute()) ed = EchoData(converted_raw_path=converted_zarr) assert hasattr(ed, "_repr_html_") html_repr = ed._repr_html_().strip() assert ( f"""<div class="xr-obj-type">EchoData: standardized raw data from {zarr_path_string}</div>""" in html_repr ) with xr.set_options(display_style="text"): html_fallback = ed._repr_html_().strip() assert html_fallback.startswith( "<pre>EchoData" ) and html_fallback.endswith("</pre>")
def test_alignment(): ds1 = xr.Dataset({"a": ("x", [1, 2])}, {"x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "b": 4}, {"x": [1, 2]}) actual = np.add(ds1, ds2) expected = xr.Dataset({"a": ("x", [4])}, {"x": [1]}) assert_identical_(actual, expected) with xr.set_options(arithmetic_join="outer"): actual = np.add(ds1, ds2) expected = xr.Dataset({ "a": ("x", [np.nan, 4, np.nan]), "b": np.nan }, coords={"x": [0, 1, 2]}) assert_identical_(actual, expected)
def _to_quarter( freq: str, pr: Optional[xarray.DataArray] = None, tas: Optional[xarray.DataArray] = None, ) -> xarray.DataArray: """Convert daily, weekly or monthly time series to quarterly time series according to ANUCLIM specifications.""" if freq.upper().startswith("D"): if tas is not None: tas = tg_mean(tas, freq="7D") if pr is not None: pr = precip_accumulation(pr, freq="7D") pr.attrs["units"] = "mm/week" freq = "W" if freq.upper().startswith("W"): window = 13 u = units.week elif freq.upper().startswith("M"): window = 3 u = units.month else: raise NotImplementedError( f'Unknown input time frequency "{freq}": must be one of "D", "W" or "M".' ) if tas is not None: tas = ensure_chunk_size(tas, time=np.ceil(window / 2)) if pr is not None: pr = ensure_chunk_size(pr, time=np.ceil(window / 2)) with xarray.set_options(keep_attrs=True): if pr is not None: pr = pint_multiply(pr, 1 * u, "mm") out = pr.rolling(time=window, center=False).sum() out.attrs = pr.attrs out.attrs["units"] = "mm" if tas is not None: out = tas.rolling(time=window, center=False).mean(skipna=False) out.attrs = tas.attrs out = ensure_chunk_size(out, time=-1) return out
def prcptot_warmcold_quarter( pr: xarray.DataArray, tas: xarray.DataArray, op: str = None, src_timestep: str = None, freq: str = "YS", ) -> xarray.DataArray: r"""ANUCLIM Total precipitation of warmest/coldest quarter. The warmest (or coldest) quarter of the year is determined, and the total precipitation of this period is calculated. If the input data frequency is daily ("D) or weekly ("W"), quarters are defined as 13 week periods, otherwise are 3 months. Parameters ---------- pr : xarray.DataArray Total precipitation rate at daily, weekly, or monthly frequency. tas : xarray.DataArray Mean temperature [℃] or [K] at daily, weekly, or monthly frequency. op : str Operation to perform: 'warmest' calculate for the warmest quarter ; 'coldest' calculate for the coldest quarter. src_timestep : {'D', 'W', 'M'} Input data time frequency - One of daily, weekly or monthly. freq : str Resampling frequency; Defaults to "YS". Returns ------- xarray.DataArray Total precipitation values of the warmest/coldest quarter of each year. Notes ----- According to the ANUCLIM user-guide https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6), input values should be at a weekly (or monthly) frequency. However, the xclim.indices implementation here will calculate the result with input data with daily frequency as well. As such weekly or monthly input values, if desired, should be calculated prior to calling the function. """ # determine input data frequency tas_qrt = _to_quarter(src_timestep, tas=tas) pr_qrt = _to_quarter(src_timestep, pr=pr) xr_op = _xr_argops[op] with xarray.set_options(keep_attrs=True): out = _from_other_arg(criteria=tas_qrt, output=pr_qrt, op=xr_op, freq=freq) out.attrs = pr_qrt.attrs return out
def _add_nudging_tendencies(merged: xr.Dataset): with xr.set_options(keep_attrs=True): Q1 = merged.Q1 + merged.air_temperature_tendency_due_to_nudging Q2 = merged.Q2 + merged.specific_humidity_tendency_due_to_nudging Q1.attrs.update({ "long_name": merged.Q1.attrs.get("long_name") + " plus dynamics nudging tendency", "description": merged.Q1.attrs.get("description") + " + dynamics nudging tendency", }) Q2.attrs.update({ "long_name": merged.Q2.attrs.get("long_name") + " plus dynamics nudging tendency", "description": merged.Q2.attrs.get("description") + " + dynamics nudging tendency", }) return Q1, Q2
def tg_mean_wetdry_quarter( tas: xarray.DataArray, pr: xarray.DataArray, op: str = None, freq: str = "YS", ) -> xarray.DataArray: r"""ANUCLIM Mean temperature of wettest/driest quarter. The wettest (or driest) quarter of the year is determined, and the mean temperature of this period is calculated. If the input data frequency is daily ("D") or weekly ("W"), quarters are defined as 13-week periods, otherwise are 3 months. Parameters ---------- tas : xarray.DataArray Mean temperature at daily, weekly, or monthly frequency. pr : xarray.DataArray Total precipitation rate at daily, weekly, or monthly frequency. op : {'wettest', 'driest'} Operation to perform: 'wettest' calculate for the wettest quarter; 'driest' calculate for the driest quarter. freq : str Resampling frequency. Returns ------- xarray.DataArray, [same as tas] Mean temperature of {op} quarter Notes ----- According to the ANUCLIM user-guide https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6), input values should be at a weekly (or monthly) frequency. However, the xclim.indices implementation here will calculate the result with input data with daily frequency as well. As such weekly or monthly input values, if desired, should be calculated prior to calling the function. """ tas_qrt = _to_quarter(tas=tas) pr_qrt = _to_quarter(pr=pr) xr_op = _xr_argops[op] with xarray.set_options(keep_attrs=True): out = _from_other_arg(criteria=pr_qrt, output=tas_qrt, op=xr_op, freq=freq) out.attrs = tas.attrs return out
def test_concat_cftimeindex(date_type, enable_cftimeindex): with xr.set_options(enable_cftimeindex=enable_cftimeindex): da1 = xr.DataArray([1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], dims=['time']) da2 = xr.DataArray([3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], dims=['time']) da = xr.concat([da1, da2], dim='time') if enable_cftimeindex and has_cftime: assert isinstance(da.indexes['time'], CFTimeIndex) else: assert isinstance(da.indexes['time'], pd.Index) assert not isinstance(da.indexes['time'], CFTimeIndex)
def _downsample_only(ds: xr.Dataset, freq_label: str, method: str) -> xr.Dataset: """Resample in time, only if given freq_label is lower frequency than time sampling of given dataset ds""" ds_freq = ds.time.values[1] - ds.time.values[0] if ds_freq < pd.to_timedelta(freq_label): resampled = ds.resample(time=freq_label, label="right") if method == "nearest": return resampled.nearest() elif method == "mean": with xr.set_options(keep_attrs=True): return resampled.mean() else: raise ValueError( f"Don't know how to resample with method={method}.") else: return ds
def _calc_ds_diurnal_cycle(ds): """ Calculates the diurnal cycle for all variables. Expects time dimension and longitude variable "lon". """ local_time = vcm.local_time(ds, time="time", lon_var="lon") local_time.attrs = {"long_name": "local time", "units": "hour"} local_time = np.floor(local_time) # equivalent to hourly binning ds["local_time"] = local_time diurnal_cycles = xr.Dataset() for var in ds.data_vars: with xr.set_options(keep_attrs=True): diurnal_cycles[var] = ( ds[[var, "local_time"]].groupby("local_time").mean()[var].load() ) return diurnal_cycles
def season_clim(ds, calendar='standard'): # Make a DataArray with the number of days in each month, size = len(time) month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar), coords=[ds.time], name='month_length') # Calculate the weights by grouping by 'time.season' weights = month_length.groupby('time.season') / month_length.groupby( 'time.season').sum() # Test that the sum of the weights for each season is 1.0 np.testing.assert_allclose( weights.groupby('time.season').sum().values, np.ones(4)) # Calculate the weighted average with xr.set_options(keep_attrs=True): return (ds * weights).groupby('time.season').sum(dim='time', skipna=False)
def _mean_bias_removal_quick(hind, bias, dim): """Quick removal of mean bias over all initializations. Args: hind (xr.object): hindcast. bias (xr.object): bias. dim (str): Time dimension name in bias. Returns: xr.object: bias removed hind """ with xr.set_options(keep_attrs=True): bias_removed_hind = (hind.groupby(f"{dim}.dayofyear") - bias.groupby(f"{dim}.dayofyear").mean()) bias_removed_hind.attrs = hind.attrs return bias_removed_hind
def test_adapt_freq(use_dask): time = pd.date_range("1990-01-01", "2020-12-31", freq="D") prvals = np.random.randint(0, 100, size=(time.size, 3)) pr = xr.DataArray( prvals, coords={ "time": time, "lat": [0, 1, 2] }, dims=("time", "lat"), attrs={"units": "mm d-1"}, ) if use_dask: pr = pr.chunk({"lat": 1}) group = Grouper("time.month") with xr.set_options(keep_attrs=True): prsim = xr.where(pr < 20, pr / 20, pr) prref = xr.where(pr < 10, pr / 20, pr) sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group) # Where the input is considered zero input_zeros = sim_ad.where(prsim <= 1) # The proportion of corrected values (time.size * 3 * 0.2 is the theoritical number of values under 1 in prsim) dP0_out = (input_zeros > 1).sum() / (time.size * 3 * 0.2) np.testing.assert_allclose(dP0_out, 0.5, atol=0.1) # Assert that corrected values were generated in the range ]1, 20 + tol[ corrected = (input_zeros.where(input_zeros > 1).stack( flat=["lat", "time"]).reset_index("flat").dropna("flat")) assert ((corrected < 20.1) & (corrected > 1)).all() # Assert that non-corrected values are untouched # Again we add a 0.5 tol because of randomness. xr.testing.assert_equal( sim_ad.where(prsim > 20.1), prsim.where(prsim > 20.5).transpose("lat", "time"), ) # Assert that Pth and dP0 are approx the good values np.testing.assert_allclose(pth, 20, rtol=0.05) np.testing.assert_allclose(dP0, 0.5, atol=0.14) assert sim_ad.units == "mm d-1" assert sim_ad.attrs["references"].startswith("Themeßl") assert pth.units == "mm d-1"
def replace_nans_with_zeroes(ds, verbose=True): ''' Replaces NaN values with zeroes for each variable within an an xarray Dataset. Args: ---- ds : xarray Dataset The input dataset, containing one or more data variables. Keyword Args (optional): ------------------------ verbose : boolean Set this switch to print out the variable name, as well as the min and max of the variable. This will illustrate the replacement of NaNs with zeroes. ''' # Keep all netCDF attributes with xr.set_options(keep_attrs=True): # Loop over all variables in the Dataset for v in ds.data_vars.keys(): # OPTIONAL STEP: # Xarray will try convert missing values to NaN's, # so you may need to replace these with zeroes. # # If your netCDF files represent e.g. emissions, # or other physical quantities, you may want to # replace these with zeros, so that NaNs won't # get read into atmospheric models, etc. # # NOTE: ds[v].values converts to a numpy ndarray, # so that you can use numpy functions. ds[v].where(np.isnan(ds[v].values), other=0.0, drop=False) # OPTIONAL: Print min & max for each variable # Comment out if you wish if verbose: print('{} : {} {}'.format(v, np.min(ds[v].values), np.max(ds[v].values))) # Return the modified Datast return ds
def multiply_area(dset, area=None): """ DESCRIPTION: =========== Muliply gridded data by it's area, if the data set does not contain an area variable, the area dataarray need to be provided. """ if area == None: try: area = dset.area.values except AttributeError: print('dataset does not contain a datavariable called area') with xr.set_options(keep_attributes=True): area_multiplied = dset[dset.varName] * area return area_multiplied
def add_tendency(state: Any, tendency: State, dt: float) -> State: """Given state and tendency prediction, return updated state. Returned state only includes variables updated by ML model.""" with xr.set_options(keep_attrs=True): updated = {} for name_ in tendency: name = str(name_) try: state_name = str(TENDENCY_TO_STATE_NAME[name]) except KeyError: raise KeyError( f"Tendency variable '{name}' does not have an entry mapping it " "to a corresponding state variable to add to. " "Existing tendencies with mappings to state are " f"{list(TENDENCY_TO_STATE_NAME.keys())}") updated[state_name] = state[state_name] + tendency[name] * dt return updated # type: ignore
def test_repr_HC( hind_ds_initialized_1d, hist_ds_uninitialized_1d, observations_ds_1d, display_style, ): """Test html repr.""" with xr.set_options(display_style=display_style): he = HindcastEnsemble(hind_ds_initialized_1d) display(he) he = he.add_uninitialized(hist_ds_uninitialized_1d) display(he) he = he.add_observations(observations_ds_1d) display(he) # no uninit he = HindcastEnsemble(hind_ds_initialized_1d) he = he.add_observations(observations_ds_1d) display(he)
def zonal_bias_3d(diag_arg: DiagArg): logger.info("Preparing zonal mean bias (3d)") prognostic, verification, grid = ( diag_arg.prediction, diag_arg.verification, diag_arg.grid, ) zonal_means = xr.Dataset() common_vars = list( set(prognostic.data_vars).intersection(verification.data_vars)) for var in common_vars: logger.info(f"Computing zonal+time mean biases (3d) for {var}") with xr.set_options(keep_attrs=True): zm_bias = zonal_mean(bias(verification[[var]], prognostic[[var]]), grid.lat) zm_bias_time_mean = time_mean(zm_bias)[var].load() zonal_means[var] = zm_bias_time_mean return zonal_means
def zonal_mean_bias_hovmoller(diag_arg: DiagArg): logger.info(f"Preparing zonal mean biases (2d)") prognostic, verification, grid = ( diag_arg.prediction, diag_arg.verification, diag_arg.grid, ) common_vars = list( set(prognostic.data_vars).intersection(verification.data_vars)) zonal_means = xr.Dataset() for var in common_vars: logger.info(f"Computing zonal mean biases (2d) over time for {var}") with xr.set_options(keep_attrs=True): zonal_means[var] = zonal_mean( bias(verification[[var]], prognostic[[var]]), grid.lat)[var].load() return zonal_means
def test_repr_of_dataarray(dataarray) -> None: formatted = fh.array_repr(dataarray) assert "dim_0" in formatted # has an expanded data section assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1 # coords and attrs don't have an items so they'll be be disabled and collapsed assert (formatted.count( "class='xr-section-summary-in' type='checkbox' disabled >") == 2) with xr.set_options(display_expand_data=False): formatted = fh.array_repr(dataarray) assert "dim_0" in formatted # has an expanded data section assert formatted.count( "class='xr-array-in' type='checkbox' checked>") == 0 # coords and attrs don't have an items so they'll be be disabled and collapsed assert (formatted.count( "class='xr-section-summary-in' type='checkbox' disabled >") == 2)
def test_amount2rate(pr_series): pr = pr_series(np.ones(365 + 366 + 365), start="2019-01-01") am = rate2amount(pr) np.testing.assert_allclose(amount2rate(am), pr) with xr.set_options(keep_attrs=True): am_ms = am.resample(time="MS").sum() am_m = am.resample(time="M").sum() pr_ms = amount2rate(am_ms) np.testing.assert_allclose(pr_ms, 1) pr_m = amount2rate(am_m) np.testing.assert_allclose(pr_m, 1) am_ys = am.resample(time="YS").sum() pr_ys = amount2rate(am_ys) np.testing.assert_allclose(pr_ys, 1)
def remove_bias(fcst, bias, method, init_dim="init_date"): """Remove model bias. Parameters ---------- fcst : xarray DataArray Forecast array with initial date and lead time dimensions bias : xarray DataArray Bias array method : {'additive', 'multiplicative'} Bias removal method init_dim: str, default 'init_date' Name of the initial date dimension in fcst Returns ------- fcst_bc : xarray DataArray Bias corrected forecast array Raises ------ ValueError For invalid method """ if method == "additive": op = operator.sub elif method == "multiplicative": op = operator.truediv else: raise ValueError(f"Unrecognised bias removal method {method}") with xr.set_options(keep_attrs=True): fcst_bc = op(fcst.groupby(f"{init_dim}.month"), bias).drop("month") fcst_bc.attrs["bias_correction_method"] = bias.attrs[ "bias_correction_method"] try: fcst_bc.attrs["bias_correction_period"] = bias.attrs[ "bias_correction_period"] except KeyError: pass return fcst_bc
def test_dump_ds(self): self.maxDiff = None with xr.set_options(display_width=80): result = self.invoke_cli(["dump", TEST_NC_FILE]) # xarray v0.16.0 changed line truncation behaviour (see # https://github.com/pydata/xarray/issues/3759 ), so this test checks # the output against two expected values to cover both pre- and # post-change behaviour. output_template = ( "<xarray.Dataset>\n" "Dimensions: (bnds: 2, lat: 180, lon: 360, time: 5)\n" "Coordinates:\n" " * lon (lon) float64 -179.5 -178.5 -177.5 ... 177.5 178.5 179.5\n" " * lat (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5\n" " * time (time) datetime64[ns] 2010-01-01T12:00:00 ... 2010-01-05T1%s\n" " lon_bnds (lon, bnds) float64 ...\n" " lat_bnds (lat, bnds) float64 ...\n" " time_bnds (time, bnds) datetime64[ns] ...\n" "Dimensions without coordinates: bnds\n" "Data variables:\n" " precipitation (time, lat, lon) float64 ...\n" " temperature (time, lat, lon) float64 ...\n" " soil_moisture (time, lat, lon) float64 ...\n" "Attributes:\n" " Conventions: CF-1.7\n" " title: Test Cube\n" " time_coverage_start: 2010-01-01T00:00:00\n" " time_coverage_end: 2010-01-06T00:00:00\n" " geospatial_lon_min: -180.0\n" " geospatial_lon_max: 180.0\n" " geospatial_lon_units: degrees_east\n" " geospatial_lat_min: -90.0\n" " geospatial_lat_max: 90.0\n" " geospatial_lat_units: degrees_north\n" ) possible_outputs = [ output_template % '2:00:00', # for xarray v0.15.1 and below output_template % '...' # for xarray v0.16.0 and above ] self.assertIn(result.output, possible_outputs) self.assertEqual(0, result.exit_code)
def temperature_seasonality(tas: xarray.DataArray) -> xarray.DataArray: r"""ANUCLIM temperature seasonality (coefficient of variation). The annual temperature coefficient of variation expressed in percent. Calculated as the standard deviation of temperature values for a given year expressed as a percentage of the mean of those temperatures. Parameters ---------- tas : xarray.DataArray Mean temperature [℃] or [K] at daily, weekly, or monthly frequency. Returns ------- xarray.DataArray The Coefficient of Variation of mean temperature values expressed in percent. Examples -------- The following would compute for each grid cell of file `tas.day.nc` the annual temperature seasonality: >>> import xclim.indices as xci >>> t = xr.open_dataset(path_to_tas_file).tas >>> tday_seasonality = xci.temperature_seasonality(t) >>> t_weekly = xci.tg_mean(t, freq='7D') >>> tweek_seasonality = xci.temperature_seasonality(t_weekly) Notes ----- For this calculation, the mean in degrees Kelvin is used. This avoids the possibility of having to divide by zero, but it does mean that the values are usually quite small. According to the ANUCLIM user-guide https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6), input values should be at a weekly (or monthly) frequency. However, the xclim.indices implementation here will calculate the result with input data with daily frequency as well. As such weekly or monthly input values, if desired, should be calculated prior to calling the function. """ tas = convert_units_to(tas, "K") with xarray.set_options(keep_attrs=True): seas = 100 * _anuclim_coeff_var(tas) seas.attrs["units"] = "%" return seas
def _multiplicative_std_correction(hind, spread, dim, obs=None): """Quick removal of std bias over all initializations without cross validation. Args: hind (xr.Dataset): hindcast. spread (xr.Dataset): model spread. dim (str): Time dimension name in bias. obs (xr.Dataset): observations Returns: xr.Dataset: bias removed hind """ seasonality = OPTIONS["seasonality"] if seasonality == "weekofyear": # convert to datetime for weekofyear operations hind = convert_cftime_to_datetime_coords(hind, "init") spread = convert_cftime_to_datetime_coords(spread, "init") obs = convert_cftime_to_datetime_coords(obs, "time") init_groupby = f"init.{seasonality}" obs_groupby = f"time.{seasonality}" with xr.set_options(keep_attrs=True): model_mean_spread = spread.groupby(init_groupby).mean() model_member_mean = hind.mean("member").groupby(init_groupby).mean() # assume that no trend here obs_spread = obs.groupby(obs_groupby).std() # z distr init_z = (hind.groupby(init_groupby) - model_member_mean).groupby(init_groupby) / model_mean_spread # scale with obs_spread and model mean init_std_corrected = (init_z.groupby(init_groupby) * obs_spread ).groupby(init_groupby) + model_member_mean init_std_corrected.attrs = hind.attrs # convert back to CFTimeIndex if needed if isinstance(init_std_corrected.init.to_index(), pd.DatetimeIndex): init_std_corrected = convert_time_index(init_std_corrected, "init", "hindcast") return init_std_corrected
def test_repr_of_dataset(dataset) -> None: formatted = fh.dataset_repr(dataset) # coords, attrs, and data_vars are expanded assert (formatted.count( "class='xr-section-summary-in' type='checkbox' checked>") == 3) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted with xr.set_options( display_expand_coords=False, display_expand_data_vars=False, display_expand_attrs=False, ): formatted = fh.dataset_repr(dataset) # coords, attrs, and data_vars are collapsed assert (formatted.count( "class='xr-section-summary-in' type='checkbox' checked>") == 0) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted
def test_array_repr(self): ds = xr.Dataset(coords={"foo": [1, 2, 3], "bar": [1, 2, 3]}) ds[(1, 2)] = xr.DataArray([0], dims="test") actual = formatting.array_repr(ds[(1, 2)]) expected = dedent("""\ <xarray.DataArray (1, 2) (test: 1)> array([0]) Dimensions without coordinates: test""") assert actual == expected with xr.set_options(display_expand_data=False): actual = formatting.array_repr(ds[(1, 2)]) expected = dedent("""\ <xarray.DataArray (1, 2) (test: 1)> 0 Dimensions without coordinates: test""") assert actual == expected
def load_dataset(filelist): # Load the dataset and fix the time coordinate # Input: list of file names *.nc to load with xr.set_options(enable_cftimeindex=True): ds = xr.open_mfdataset(filelist, autoclose='true', decode_times=False) time0 = ds['time'] time2 = cftime.num2date(ds['time'][:], units=ds['time'].units, calendar=ds['time'].calendar, only_use_cftime_datetimes=True) ds['time'].values = time2 ds = convert_units(ds) return ds
def test_assert_has_data_for_time_cftime_datetimes(calendar, date_type): time_bounds = np.array([[0, 2], [2, 4], [4, 6]]) nv = np.array([0, 1]) time = np.array([1, 3, 5]) data = np.zeros((3)) var_name = 'a' ds = xr.DataArray(data, coords=[time], dims=[TIME_STR], name=var_name).to_dataset() ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, coords=[time, nv], dims=[TIME_STR, BOUNDS_STR], name=TIME_BOUNDS_STR) units_str = 'days since 0002-01-02 00:00:00' ds[TIME_STR].attrs['units'] = units_str ds[TIME_STR].attrs['calendar'] = calendar ds = ensure_time_avg_has_cf_metadata(ds) ds = set_grid_attrs_as_coords(ds) with warnings.catch_warnings(record=True): with xr.set_options(enable_cftimeindex=True): ds = xr.decode_cf(ds) da = ds[var_name] start_date = date_type(2, 1, 2) end_date = date_type(2, 1, 8) _assert_has_data_for_time(da, start_date, end_date) start_date_bad = date_type(2, 1, 1) end_date_bad = date_type(2, 1, 9) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date_bad, end_date) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date, end_date_bad) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date_bad, end_date_bad)
def _prep_time_data(ds): """Prepare time coordinate information in Dataset for use in aospy. 1. If the Dataset contains a time bounds coordinate, add attributes representing the true beginning and end dates of the time interval used to construct the Dataset 2. If the Dataset contains a time bounds coordinate, overwrite the time coordinate values with the averages of the time bounds at each timestep 3. Decode the times into np.datetime64 objects for time indexing Parameters ---------- ds : Dataset Pre-processed Dataset with time coordinate renamed to internal_names.TIME_STR Returns ------- Dataset The processed Dataset """ ds = times.ensure_time_as_index(ds) if TIME_BOUNDS_STR in ds: ds = times.ensure_time_avg_has_cf_metadata(ds) ds[TIME_STR] = times.average_time_bounds(ds) else: logging.warning("dt array not found. Assuming equally spaced " "values in time, even though this may not be " "the case") ds = times.add_uniform_time_weights(ds) # Suppress enable_cftimeindex is a no-op warning; we'll keep setting it for # now to maintain backwards compatibility for older xarray versions. with warnings.catch_warnings(): warnings.filterwarnings('ignore') with xr.set_options(enable_cftimeindex=True): ds = xr.decode_cf(ds, decode_times=True, decode_coords=False, mask_and_scale=True) return ds
def test_invalid_option_raises(): with pytest.raises(ValueError): xarray.set_options(not_a_valid_options=True)
def test_arithmetic_join(): with pytest.raises(ValueError): xarray.set_options(arithmetic_join='invalid') with xarray.set_options(arithmetic_join='exact'): assert OPTIONS['arithmetic_join'] == 'exact'
def test_enable_cftimeindex(): with pytest.raises(ValueError): xarray.set_options(enable_cftimeindex=None) with pytest.warns(FutureWarning, match='no-op'): with xarray.set_options(enable_cftimeindex=True): assert OPTIONS['enable_cftimeindex']