def test_calendars(self): # generate test DataArray time_std = date_range("1991-07-01", "1993-06-30", freq="D", calendar="standard") time_365 = date_range("1991-07-01", "1993-06-30", freq="D", calendar="noleap") data_std = xr.DataArray( np.ones((time_std.size, 4)), dims=("time", "lon"), coords={"time": time_std, "lon": [-72, -71, -70, -69]}, ) # generate test start and end dates start_v = [[200, 200, np.nan, np.nan], [200, 200, 60, 60]] end_v = [[200, np.nan, 60, np.nan], [360, 60, 360, 80]] start_std = xr.DataArray( start_v, dims=("time", "lon"), coords={"time": [time_std[0], time_std[366]], "lon": data_std.lon}, attrs={"calendar": "standard", "is_dayofyear": 1}, ) end_std = xr.DataArray( end_v, dims=("time", "lon"), coords={"time": [time_std[0], time_std[366]], "lon": data_std.lon}, attrs={"calendar": "standard", "is_dayofyear": 1}, ) end_noleap = xr.DataArray( end_v, dims=("time", "lon"), coords={"time": [time_365[0], time_365[365]], "lon": data_std.lon}, attrs={"calendar": "noleap", "is_dayofyear": 1}, ) out = generic.aggregate_between_dates( data_std, start_std, end_std, op="sum", freq="AS-JUL" ) # expected output s = doy_to_days_since(start_std) e = doy_to_days_since(end_std) expected = e - s expected = xr.where(((s > e) | (s.isnull()) | (e.isnull())), np.nan, expected) np.testing.assert_allclose(out, expected) # check calendar convertion out_noleap = generic.aggregate_between_dates( data_std, start_std, end_noleap, op="sum", freq="AS-JUL" ) np.testing.assert_allclose(out, out_noleap)
def make_ensemble(files: List[Path], percentiles: List[int], average_dims: Optional[Tuple[str]] = None) -> None: ensemble = ensembles.create_ensemble(files) # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) for v in ensemble.data_vars: if ensemble[v].attrs.get('is_dayofyear', 0) == 1: ensemble[v] = doy_to_days_since(ensemble[v]) if average_dims is not None: ensemble = ensemble.mean(dim=average_dims) ensemble_percentiles = ensembles.ensemble_percentiles(ensemble, values=percentiles) # Doy data converted previously is converted back. for v in ensemble_percentiles.data_vars: if ensemble_percentiles[v].attrs.get('units', '').startswith('days after'): ensemble_percentiles[v] = days_since_to_doy( ensemble_percentiles[v]) # Depending on the datasets, I've found that writing the netcdf could hang # if the dataset was not loaded explicitely previously... Not sure why. # The datasets should be pretty small when computing the ensembles, so this is # a best effort at working around what looks like a bug in either xclim or xarray. # The xarray documentation mentions: 'this method can be necessary when working # with many file objects on disk.' ensemble_percentiles.load() return ensemble_percentiles
def test_time_length(self): # generate test DataArray time_data = date_range( "1991-01-01", "1993-12-31", freq="D", calendar="standard" ) time_start = date_range( "1990-01-01", "1992-12-31", freq="D", calendar="standard" ) time_end = date_range("1991-01-01", "1993-12-31", freq="D", calendar="standard") data = xr.DataArray( np.ones((time_data.size, 4)), dims=("time", "lon"), coords={"time": time_data, "lon": [-72, -71, -70, -69]}, ) # generate test start and end dates start_v = [[200, 200, np.nan, np.nan], [200, 200, 60, 60], [150, 100, 40, 10]] end_v = [[200, np.nan, 60, np.nan], [360, 60, 360, 80], [200, 200, 60, 50]] start = xr.DataArray( start_v, dims=("time", "lon"), coords={ "time": [time_start[0], time_start[365], time_start[730]], "lon": data.lon, }, attrs={"calendar": "standard", "is_dayofyear": 1}, ) end = xr.DataArray( end_v, dims=("time", "lon"), coords={ "time": [time_end[0], time_end[365], time_end[731]], "lon": data.lon, }, attrs={"calendar": "standard", "is_dayofyear": 1}, ) out = generic.aggregate_between_dates(data, start, end, op="sum", freq="YS") # expected output s = doy_to_days_since(start) e = doy_to_days_since(end) expected = e - s expected[1, 1] = np.nan np.testing.assert_allclose(out[0:2], expected) np.testing.assert_allclose(out[2], np.array([np.nan, np.nan, np.nan, np.nan]))
def day_lengths( dates: xr.DataArray, lat: xr.DataArray, obliquity: float = -0.4091, summer_solstice: DayOfYearStr = "06-21", start_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None, end_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None, freq: str = "YS", ) -> xr.DataArray: r"""Day-lengths according to latitude, obliquity, and day of year. Parameters ---------- dates: xr.DataArray lat: xarray.DataArray Latitude coordinate. obliquity: float Obliquity of the elliptic (radians). Default: -0.4091. summer_solstice: DayOfYearStr Date of summer solstice in northern hemisphere. Used for approximating solar julian dates. start_date: xarray.DataArray or DayOfYearStr, optional Start date to consider for calculating mean day lengths. Default: None. end_date: xarray.DataArray or DayOfYearStr, optional End date to consider for calculating mean day lengths. Default: None. freq : str Resampling frequency. Returns ------- xarray.DataArray If start and end date provided, returns total sum of daylight-hour between dates at provided frequency. If no start and end date provided, returns day-length in hours per individual day. Notes ----- Daylight-hours are dependent on latitude, :math:`lat`, the Julian day (solar day) from the summer solstice in the Northern hemisphere, :math:`Jday`, and the axial tilt :math:`Axis`, therefore day-length at any latitude for a given date on Earth, :math:`dayLength_{lat_{Jday}}`, for a given year in days, :math:`Year`, can be approximated as follows: .. math:: dayLength_{lat_{Jday}} = f({lat}, {Jday}) = \frac{\arccos(1-m_{lat_{Jday}})}{\pi} * 24 Where: .. math:: m_{lat_{Jday}} = f({lat}, {Jday}) = 1 - \tan({Lat}) * \tan \left({Axis}*\cos\left[\frac{2*\pi*{Jday}}{||{Year}||} \right] \right) The total sum of daylight hours for a given period between two days (:math:`{Jday} = 0` -> :math:`N`) within a solar year then is: .. math:: \sum({SeasonDayLength_{lat}}) = \sum_{Jday=1}^{N} dayLength_{lat_{Jday}} References ---------- Modified day-length equations for Huglin heliothermal index published in Hall, A., & Jones, G. V. (2010). Spatial analysis of climate in winegrape-growing regions in Australia. Australian Journal of Grape and Wine Research, 16(3), 389‑404. https://doi.org/10.1111/j.1755-0238.2010.00100.x Examples available from Glarner, 2006 (http://www.gandraxa.com/length_of_day.xml). """ cal = get_calendar(dates) year_length = dates.time.copy( data=[days_in_year(x, calendar=cal) for x in dates.time.dt.year]) julian_date_from_solstice = dates.time.copy(data=doy_to_days_since( dates.time.dt.dayofyear, start=summer_solstice, calendar=cal)) m_lat_dayofyear = 1 - np.tan(np.radians(lat)) * np.tan(obliquity * (np.cos( (2 * np.pi * julian_date_from_solstice) / year_length))) day_length_hours = (np.arccos(1 - m_lat_dayofyear) / np.pi) * 24 if start_date and end_date: return aggregate_between_dates(day_length_hours, start=start_date, end=end_date, op="sum", freq=freq) else: return day_length_hours
def aggregate_between_dates( data: xr.DataArray, start: Union[xr.DataArray, DayOfYearStr], end: Union[xr.DataArray, DayOfYearStr], op: str = "sum", freq: Optional[str] = None, ) -> xr.DataArray: """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data. Parameters ---------- data : xr.DataArray Data to aggregate between start and end dates. start : xr.DataArray or DayOfYearStr Start dates (as day-of-year) for the aggregation periods. end : xr.DataArray or DayOfYearStr End (as day-of-year) dates for the aggregation periods. op : {'min', 'max', 'sum', 'mean', 'std'} Operator. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan. If there is no start and/or end date, returns np.nan. """ def _get_days(_bound, _group, _base_time): """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr.""" if isinstance(_bound, str): b_i = rl.index_of_date(_group.time, _bound, max_idxs=1) # noqa if not len(b_i): return None return (_group.time.isel(time=b_i[0]) - _group.time.isel(time=0)).dt.days if _base_time in _bound.time: return _bound.sel(time=_base_time) return None if freq is None: frequencies = [] for i, bound in enumerate([start, end], start=1): try: frequencies.append(xr.infer_freq(bound.time)) except AttributeError: frequencies.append(None) good_freq = set(frequencies) - {None} if len(good_freq) != 1: raise ValueError( f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}." " Please consider providing `freq` manually.") freq = good_freq.pop() cal = get_calendar(data, dim="time") if not isinstance(start, str): start = convert_calendar(start, cal) start.attrs["calendar"] = cal start = doy_to_days_since(start) if not isinstance(end, str): end = convert_calendar(end, cal) end.attrs["calendar"] = cal end = doy_to_days_since(end) out = list() for base_time, indexes in data.resample(time=freq).groups.items(): # get group slice group = data.isel(time=indexes) start_d = _get_days(start, group, base_time) end_d = _get_days(end, group, base_time) # convert bounds for this group if start_d is not None and end_d is not None: days = (group.time - base_time).dt.days days[days < 0] = np.nan masked = group.where((days >= start_d) & (days <= end_d - 1)) res = getattr(masked, op)(dim="time", skipna=True) res = xr.where( ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())), np.nan, res) # Re-add the time dimension with the period's base time. res = res.expand_dims(time=[base_time]) out.append(res) else: # Get an array with the good shape, put nans and add the new time. res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time]) out.append(res) continue out = xr.concat(out, dim="time") return out
def test_frequency(self): # generate test DataArray time_data = date_range( "1991-01-01", "1992-05-31", freq="D", calendar="standard" ) data = xr.DataArray( np.ones((time_data.size, 2)), dims=("time", "lon"), coords={"time": time_data, "lon": [-70, -69]}, ) # generate test start and end dates start_v = [[70, 100], [200, 200], [270, 300], [35, 35], [80, 80]] end_v = [[130, 70], [200, np.nan], [330, 270], [35, np.nan], [150, 150]] end_m_v = [[20, 20], [40, 40], [80, 80], [100, 100], [130, 130]] start = xr.DataArray( start_v, dims=("time", "lon"), coords={ "time": [ time_data[59], time_data[151], time_data[243], time_data[334], time_data[425], ], "lon": data.lon, }, attrs={"calendar": "standard", "is_dayofyear": 1}, ) end = xr.DataArray( end_v, dims=("time", "lon"), coords={ "time": [ time_data[59], time_data[151], time_data[243], time_data[334], time_data[425], ], "lon": data.lon, }, attrs={"calendar": "standard", "is_dayofyear": 1}, ) end_m = xr.DataArray( end_m_v, dims=("time", "lon"), coords={ "time": [ time_data[0], time_data[31], time_data[59], time_data[90], time_data[120], ], "lon": data.lon, }, attrs={"calendar": "standard", "is_dayofyear": 1}, ) out = generic.aggregate_between_dates(data, start, end, op="sum", freq="QS-DEC") # expected output s = doy_to_days_since(start) e = doy_to_days_since(end) expected = e - s expected = xr.where(expected < 0, np.nan, expected) np.testing.assert_allclose(out[0], np.array([np.nan, np.nan])) np.testing.assert_allclose(out[1:6], expected) with pytest.raises(ValueError): generic.aggregate_between_dates(data, start, end_m)
def test_doy_to_days_since(): # simple test time = date_range("2020-07-01", "2022-07-01", freq="AS-JUL") da = xr.DataArray( [190, 360, 3], dims=("time",), coords={"time": time}, attrs={"is_dayofyear": 1, "calendar": "default"}, ) out = doy_to_days_since(da) np.testing.assert_array_equal(out, [7, 178, 186]) assert out.attrs["units"] == "days after 07-01" assert "is_dayofyear" not in out.attrs da2 = days_since_to_doy(out) xr.testing.assert_identical(da, da2) out = doy_to_days_since(da, start="07-01") np.testing.assert_array_equal(out, [7, 178, 186]) # other calendar out = doy_to_days_since(da, calendar="noleap") assert out.attrs["calendar"] == "noleap" np.testing.assert_array_equal(out, [8, 178, 186]) da2 = days_since_to_doy(out) # calendar read from attribute da2.attrs.pop("calendar") # drop for identicality da.attrs.pop("calendar") # drop for identicality xr.testing.assert_identical(da, da2) # with start time = date_range("2020-12-31", "2022-12-31", freq="Y") da = xr.DataArray( [190, 360, 3], dims=("time",), coords={"time": time}, name="da", attrs={"is_dayofyear": 1, "calendar": "default"}, ) out = doy_to_days_since(da, start="01-02") np.testing.assert_array_equal(out, [188, 358, 1]) da2 = days_since_to_doy(out) # start read from attribute assert da2.name == da.name xr.testing.assert_identical(da, da2) # finer freq time = date_range("2020-01-01", "2020-03-01", freq="MS") da = xr.DataArray( [15, 33, 66], dims=("time",), coords={"time": time}, name="da", attrs={"is_dayofyear": 1, "calendar": "default"}, ) out = doy_to_days_since(da) assert out.attrs["units"] == "days after time coordinate" np.testing.assert_array_equal(out, [14, 1, 5]) da2 = days_since_to_doy(out) # start read from attribute xr.testing.assert_identical(da, da2)