def train( self, ref: DataArray, hist: DataArray, ): """Train the adjustment object. Refer to the class documentation for the algorithm details. Parameters ---------- ref : DataArray Training target, usually a reference time series drawn from observations. hist : DataArray Training data, usually a model output whose biases are to be adjusted. """ if self._trained: warn("train() was already called, overwriting old results.") if hasattr(self, "group"): # Right now there is no other way of getting the main adjustment dimension _raise_on_multiple_chunk(ref, self.group.dim) _raise_on_multiple_chunk(hist, self.group.dim) if self.group.prop == "dayofyear" and get_calendar( ref) != get_calendar(hist): warn( ("Input ref and hist are defined on different calendars, " "this is not recommended when using 'dayofyear' grouping " "and could give strange results. See `xclim.core.calendar` " "for tools to convert your data to a common calendar."), stacklevel=4, ) self["hist_calendar"] = get_calendar(hist) self._train(ref, hist)
def train(cls, ref: DataArray, hist: DataArray, **kwargs): """Train the adjustment object. Refer to the class documentation for the algorithm details. Parameters ---------- ref : DataArray Training target, usually a reference time series drawn from observations. hist : DataArray Training data, usually a model output whose biases are to be adjusted. """ kwargs = parse_group(cls._train, kwargs) skip_checks = kwargs.pop("skip_input_checks", False) if not skip_checks: (ref, hist), train_units = cls._harmonize_units(ref, hist) if "group" in kwargs: cls._check_inputs(ref, hist, group=kwargs["group"]) hist = convert_units_to(hist, ref) else: train_units = "" ds, params = cls._train(ref, hist, **kwargs) obj = cls( _trained=True, hist_calendar=get_calendar(hist), train_units=train_units, **params, ) obj.set_dataset(ds) return obj
def get_coordinate(self, ds=None): """Return the coordinate as in the output of group.apply. Currently, only implemented for groupings with prop == month or dayofyear. For prop == dayfofyear, a ds (Dataset or DataArray) can be passed to infer the max doy from the available years and calendar. """ if self.prop == "month": return xr.DataArray(np.arange(1, 13), dims=("month", ), name="month") if self.prop == "season": return xr.DataArray(["DJF", "MAM", "JJA", "SON"], dims=("season", ), name="season") if self.prop == "dayofyear": if ds is not None: cal = get_calendar(ds, dim=self.dim) mdoy = max( days_in_year(yr, cal) for yr in np.unique(ds[self.dim].dt.year)) else: mdoy = 365 return xr.DataArray(np.arange(1, mdoy + 1), dims=("dayofyear"), name="dayofyear") if self.prop == "group": return xr.DataArray([1], dims=("group", ), name="group") # TODO woups what happens when there is no group? (prop is None) raise NotImplementedError()
def adjust(self, sim: DataArray, **kwargs): """Return bias-adjusted data. Refer to the class documentation for the algorithm details. Parameters ---------- sim : DataArray Time series to be bias-adjusted, usually a model output. kwargs : Algorithm-specific keyword arguments, see class doc. """ if not self._trained: raise ValueError("train() must be called before adjusting.") if hasattr(self, "group"): # Right now there is no other way of getting the main adjustment dimension _raise_on_multiple_chunk(sim, self.group.dim) if (self.group.prop == "dayofyear" and get_calendar(sim) != self.hist_calendar): warn( ("This adjustment was trained on a simulation with the " f"{self._hist_calendar} calendar but the sim input uses " f"{get_calendar(sim)}. This is not recommended with dayofyear " "grouping and could give strange results."), stacklevel=4, ) scen = self._adjust(sim, **kwargs) params = ", ".join([f"{k}={repr(v)}" for k, v in kwargs.items()]) scen.attrs["xclim_history"] = update_history( f"Bias-adjusted with {str(self)}.adjust(sim, {params})", sim) return scen
def test_convert_calendar(source, target, target_as_str, freq): src = xr.DataArray( date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source), dims=("time", ), name="time", ) da_src = xr.DataArray(np.linspace(0, 1, src.size), dims=("time", ), coords={"time": src}) tgt = xr.DataArray( date_range("2004-01-01", "2004-12-31", freq=freq, calendar=target), dims=("time", ), name="time", ) conv = convert_calendar(da_src, target if target_as_str else tgt) assert get_calendar(conv) == target if target_as_str and max_doy[source] < max_doy[target]: assert conv.size == src.size elif not target_as_str: assert conv.size == tgt.size assert conv.isnull().sum() == max(max_doy[target] - max_doy[source], 0)
def snd_max_doy(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: """Maximum snow depth day of year. Day of year when surface snow reaches its peak value. If snow depth is 0 over entire period, return NaN. Parameters ---------- snd : xarray.DataArray Surface snow depth. freq : str Resampling frequency. Returns ------- xarray.DataArray The day of year at which snow depth reaches its maximum value. """ from xclim.core.missing import at_least_n_valid # Identify periods where there is at least one non-null value for snow depth valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) # Compute doymax. Will return first time step if all snow depths are 0. out = generic.select_resample_op(snd, op=generic.doymax, freq=freq) out.attrs.update(units="", is_dayofyear=1, calendar=get_calendar(snd)) # Mask arrays that miss at least one non-null snd. return out.where(~valid)
def test_convert_calendar_360_days(source, target, freq, align_on): src = xr.DataArray( date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source), dims=("time", ), name="time", ) da_src = xr.DataArray(np.linspace(0, 1, src.size), dims=("time", ), coords={"time": src}) conv = convert_calendar(da_src, target, align_on=align_on) assert get_calendar(conv) == target if align_on == "date": np.testing.assert_array_equal( conv.time.resample(time="M").last().dt.day, [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], ) elif target == "360_day": np.testing.assert_array_equal( conv.time.resample(time="M").last().dt.day, [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29], ) else: np.testing.assert_array_equal( conv.time.resample(time="M").last().dt.day, [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31], ) if source == "360_day" and align_on == "year": assert conv.size == 360 if freq == "D" else 360 * 4 else: assert conv.size == 359 if freq == "D" else 359 * 4
def doymin(da: xr.DataArray) -> xr.DataArray: """Return the day of year of the minimum value.""" i = da.argmin(dim="time") out = da.time.dt.dayofyear.isel(time=i, drop=True) out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(da)) return out
def test_convert_calendar_360_days_random(): da_std = xr.DataArray( np.linspace(0, 1, 366 * 2), dims=("time",), coords={ "time": date_range( "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default" ) }, ) da_360 = xr.DataArray( np.linspace(0, 1, 360 * 2), dims=("time",), coords={ "time": date_range( "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day" ) }, ) conv = convert_calendar(da_std, "360_day", align_on="random") assert get_calendar(conv) == "360_day" assert conv.size == 720 conv2 = convert_calendar(da_std, "360_day", align_on="random") assert (conv != conv2).any() conv = convert_calendar(da_360, "default", align_on="random") assert get_calendar(conv) == "default" assert conv.size == 720 assert np.datetime64("2004-02-29") not in conv.time conv2 = convert_calendar(da_360, "default", align_on="random") assert (conv2 != conv).any() conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN) conv = conv.where(conv.isnull(), drop=True) nandoys = conv.time.dt.dayofyear[::2] assert all(nandoys < np.array([74, 147, 220, 293, 366])) assert all(nandoys > np.array([0, 73, 146, 219, 292]))
def unpack_moving_yearly_window(da: xr.DataArray, dim: str = "movingwin"): """Unpack a constructed moving window dataset to a normal timeseries, only keeping the central data. Unpack DataArrays created with :py:func:`construct_moving_yearly_window` and recreate a timeseries data. Only keeps the central non-overlapping years. The final timeseries will be (window - step) years shorter than the initial one. The window length and window step are inferred from the coordinates. Parameters ---------- da: xr.DataArray As constructed by :py:func:`construct_moving_yearly_window`. dim : str The window dimension name as given to the construction function. """ # Get number of samples by year (and perform checks) N_in_year = _get_number_of_elements_by_year(da.time) # Might be smaller than the original moving window, doesn't matter window = da.time.size / N_in_year if window % 1 != 0: warnings.warn( f"Incomplete data received as number of years covered is not an integer ({window})" ) # Get step in number of years days_in_year = max_doy[get_calendar(da)] step = np.unique(da[dim].diff(dim).dt.days / days_in_year) if len(step) > 1: raise ValueError("The spacing between the windows is not equal.") step = int(step[0]) # Which years to keep: length step, in the middle of window left = int((window - step) // 2) # first year to keep # Keep only the middle years da = da.isel(time=slice(left * N_in_year, (left + step) * N_in_year)) out = [] for win_start in da[dim]: slc = da.sel({dim: win_start}).drop_vars(dim) dt = win_start.values - da[dim][0].values slc["time"] = slc.time + dt out.append(slc) return xr.concat(out, "time")
def _check_inputs(cls, *inputs, group): """ Raises an error if there are chunks along the main dimension. Also raises if cls._allow_diff_calendars is False and calendars differ. """ for inda in inputs: if uses_dask(inda) and len(inda.chunks[inda.get_axis_num(group.dim)]) > 1: raise ValueError( f"Multiple chunks along the main adjustment dimension {group.dim} is not supported." ) # All calendars used by the inputs calendars = {get_calendar(inda, group.dim) for inda in inputs} if not cls._allow_diff_calendars and len(calendars) > 1: raise ValueError( "Inputs are defined on different calendars," f" this is not supported for {cls.__name__} adjustment." ) # Check multivariate dimensions mvcrds = [] for inda in inputs: for crd in inda.coords.values(): if crd.attrs.get("is_variables", False): mvcrds.append(crd) if mvcrds and ( not all(mvcrds[0].equals(mv) for mv in mvcrds[1:]) or len(mvcrds) != len(inputs) ): raise ValueError( "Inputs have different multivariate coordinates " f"({set(mv.name for mv in mvcrds)})." ) if group.prop == "dayofyear" and ( "default" in calendars or "standard" in calendars ): warn( "Strange results could be returned when using dayofyear grouping " "on data defined in the proleptic_gregorian calendar " )
def test_interp_calendar(source, target): src = xr.DataArray( date_range("2004-01-01", "2004-07-30", freq="D", calendar=source), dims=("time", ), name="time", ) tgt = xr.DataArray( date_range("2004-01-01", "2004-07-30", freq="D", calendar=target), dims=("time", ), name="time", ) da_src = xr.DataArray(np.linspace(0, 1, src.size), dims=("time", ), coords={"time": src}) conv = interp_calendar(da_src, tgt) assert conv.size == tgt.size assert get_calendar(conv) == target np.testing.assert_almost_equal(conv.max(), 1, 2) assert conv.min() == 0
def adjust(self, sim: DataArray, **kwargs): """Return bias-adjusted data. Refer to the class documentation for the algorithm details. Parameters ---------- sim : DataArray Time series to be bias-adjusted, usually a model output. """ if not self.__trained: raise ValueError("train() must be called before adjusting.") if (hasattr(self, "group") and self.group.prop == "dayofyear" and get_calendar(sim) != self._hist_calendar): warn( ("This adjustment was trained on a simulation with the " f"{self._hist_calendar} calendar but the sim input uses " f"{get_calendar(sim)}. This is not recommended with dayofyear " "grouping and could give strange results."), stacklevel=4, ) scen = self._adjust(sim, **kwargs) scen.attrs["bias_adjusted"] = True return scen
def _get_number_of_elements_by_year(time): """Get the number of elements in time in a year by inferring its sampling frequency. Only calendar with uniform year lengths are supported : 360_day, noleap, all_leap. """ cal = get_calendar(time) # Calendar check if cal in ["standard", "gregorian", "default", "proleptic_gregorian"]: raise ValueError( "For moving window computations, the data must have a uniform calendar (360_day, no_leap or all_leap)" ) mult, freq, _, _ = parse_offset(xr.infer_freq(time)) days_in_year = max_doy[cal] elements_in_year = {"Q": 4, "M": 12, "D": days_in_year, "H": days_in_year * 24} N_in_year = elements_in_year.get(freq, 1) / mult if N_in_year % 1 != 0: raise ValueError( f"Sampling frequency of the data must be Q, M, D or H and evenly divide a year (got {mult}{freq})." ) return int(N_in_year)
def test_ensure_cftime_array(inp, calout): out = ensure_cftime_array(inp) assert get_calendar(out) == calout
def test_get_calendar(file, cal, maxdoy): with open_dataset(os.path.join(*file)) as ds: out_cal = get_calendar(ds) assert cal == out_cal assert max_doy[cal] == maxdoy
def xclim_convert_360day_calendar_interpolate( ds, target="noleap", align_on="random", interpolation="linear", return_indices=False, ignore_nans=True, ): """ Parameters ---------- ds : xr.Dataset target : str see xclim.core.calendar.convert_calendar align_on : str this determines which days in the calendar will have missing values or will be the product of interpolation, if there is. It could be every year the same calendar days, or the days could randomly change. see xclim.core.calendar.convert_calendar interpolation : None or str passed to xr.Dataset.interpolate_na if not None return_indices : bool on top of the converted dataset, return a list of the array indices identifying values that were inserted. This assumes there were no NaNs before conversion. ignore_nans : bool if False and there are any NaNs in `ds` variables, an assertion error will be raised. NaNs are ignored otherwise. Returns ------- tuple(xr.Dataset, xr.Dataset) if return_indices is True, xr.Dataset otherwise. Notes ----- The default values of `target`, `align_on` and `interpolation` mean that our default approach is equivalent to that of the LOCA calendar conversion [1] for conversion from 360 days calendars to noleap calendars. In that approach, 5 calendar days are added (noleap calendars always have 365 days) to each year. But those calendar days are not necessarily those that will have their value be the product of interpolation. The days for which we interpolate are selected randomly every block of 72 days, so that they change every year. [1] http://loca.ucsd.edu/loca-calendar/ """ if get_calendar(ds) != "360_day": raise ValueError( "tried to use 360 day calendar conversion for a non-360-day calendar dataset" ) if not ignore_nans: for var in ds: assert ( ds[var].isnull().sum() == 0 ), "360 days calendar conversion with interpolation : there are nans !" ds_converted = convert_calendar( ds, target=target, align_on=align_on, missing=np.NaN ) if interpolation: ds_out = ds_converted.interpolate_na("time", interpolation) else: ds_out = ds_converted if return_indices: return (ds_out, xr.ufuncs.isnan(ds_converted)) else: return ds_out
def standardize_gcm(ds, leapday_removal=True): """ 360 calendar conversion requires that there are no chunks in the 'time' dimension of `ds`. Parameters ---------- ds : xr.Dataset leapday_removal : bool, optional Returns ------- xr.Dataset """ # Remove cruft coordinates, variables, dims. cruft_vars = ("height", "member_id", "time_bnds") dims_to_squeeze = [] coords_to_drop = [] for v in cruft_vars: if v in ds.dims: dims_to_squeeze.append(v) elif v in ds.coords: coords_to_drop.append(v) ds_cleaned = ds.squeeze(dims_to_squeeze, drop=True).reset_coords( coords_to_drop, drop=True ) # Cleanup time. # if variable is precip, need to update units to mm day-1 if "pr" in ds_cleaned.variables: # units should be kg/m2/s in CMIP6 output if ds_cleaned["pr"].units == "kg m-2 s-1": # convert to mm/day mmday_conversion = 24 * 60 * 60 ds_cleaned["pr"] = ds_cleaned["pr"] * mmday_conversion # update units attribute ds_cleaned["pr"].attrs["units"] = "mm day-1" else: # we want this to fail, as pr units are something we don't expect raise ValueError("check units: pr units attribute is not kg m-2 s-1") cal = get_calendar(ds_cleaned) if ( cal == "360_day" or leapday_removal ): # calendar conversion is necessary in either case # if calendar is just integers, xclim cannot understand it if ds_cleaned.time.dtype == "int64": ds_cleaned["time"] = xr.decode_cf(ds_cleaned).time if cal == "360_day": # Cannot have chunks in time dimension for 360 day calendar conversion so loading # data into memory. ds_cleaned.load() if leapday_removal: # 360 day -> noleap ds_converted = xclim_convert_360day_calendar_interpolate( ds=ds_cleaned, target="noleap", align_on="random", interpolation="linear", ) else: # 360 day -> standard ds_converted = xclim_convert_360day_calendar_interpolate( ds=ds_cleaned, target="standard", align_on="random", interpolation="linear", ) else: # any -> noleap # remove leap days and update calendar ds_converted = xclim_remove_leapdays(ds_cleaned) # rechunk, otherwise chunks are different sizes ds_out = ds_converted.chunk( {"time": 730, "lat": len(ds_cleaned.lat), "lon": len(ds_cleaned.lon)} ) else: ds_out = ds_cleaned return ds_out
def _ens_align_datasets( datasets: List[Union[xr.Dataset, Path, str, List[Union[Path, str]]]], mf_flag: bool = False, resample_freq: str = None, calendar: str = "default", **xr_kwargs, ) -> List[xr.Dataset]: """Create a list of aligned xarray Datasets for ensemble Dataset creation. Parameters ---------- datasets : List[Union[xr.Dataset, xr.DataArray, Path, str, List[Path, str]]] List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of lists where each sublist contains input .nc files of an xarray multifile Dataset. DataArrays should have a name so they can be converted to datasets. mf_flag : bool If True climate simulations are treated as xarray multifile datasets before concatenation. Only applicable when datasets is a sequence of file paths. resample_freq : Optional[str] If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned. If resample_freq is set, the time coordinate of each members will be modified to fit this frequency. calendar : str The calendar of the time coordinate of the ensemble. For conversions involving '360_day', the align_on='date' option is used. See `xclim.core.calendar.convert_calendar`. 'default' is the standard calendar using np.datetime64 objects. xr_kwargs : Any keyword arguments to be given to xarray when opening the files. Returns ------- List[xr.Dataset] """ xr_kwargs.setdefault("chunks", "auto") xr_kwargs.setdefault("decode_times", False) ds_all = [] for i, n in enumerate(datasets): logging.info(f"Accessing {n} of {len(datasets)}") if mf_flag: ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs) else: if isinstance(n, xr.Dataset): ds = n elif isinstance(n, xr.DataArray): ds = n.to_dataset() else: ds = xr.open_dataset(n, **xr_kwargs) if "time" in ds.coords: time = xr.decode_cf(ds).time if resample_freq is not None: counts = time.resample(time=resample_freq).count() if any(counts > 1): raise ValueError( f"Alignment of dataset #{i:02d} failed : its time axis cannot be resampled to freq {resample_freq}." ) time = counts.time ds["time"] = time cal = get_calendar(time) ds = convert_calendar( ds, calendar, align_on="date" if "360_day" in [cal, calendar] else None, ) ds_all.append(ds) return ds_all
def test_get_calendar_errors(obj): with pytest.raises(ValueError, match="Calendar could not be inferred from object"): get_calendar(obj)
def day_lengths( dates: xr.DataArray, lat: xr.DataArray, obliquity: float = -0.4091, summer_solstice: DayOfYearStr = "06-21", start_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None, end_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None, freq: str = "YS", ) -> xr.DataArray: r"""Day-lengths according to latitude, obliquity, and day of year. Parameters ---------- dates: xr.DataArray lat: xarray.DataArray Latitude coordinate. obliquity: float Obliquity of the elliptic (radians). Default: -0.4091. summer_solstice: DayOfYearStr Date of summer solstice in northern hemisphere. Used for approximating solar julian dates. start_date: xarray.DataArray or DayOfYearStr, optional Start date to consider for calculating mean day lengths. Default: None. end_date: xarray.DataArray or DayOfYearStr, optional End date to consider for calculating mean day lengths. Default: None. freq : str Resampling frequency. Returns ------- xarray.DataArray If start and end date provided, returns total sum of daylight-hour between dates at provided frequency. If no start and end date provided, returns day-length in hours per individual day. Notes ----- Daylight-hours are dependent on latitude, :math:`lat`, the Julian day (solar day) from the summer solstice in the Northern hemisphere, :math:`Jday`, and the axial tilt :math:`Axis`, therefore day-length at any latitude for a given date on Earth, :math:`dayLength_{lat_{Jday}}`, for a given year in days, :math:`Year`, can be approximated as follows: .. math:: dayLength_{lat_{Jday}} = f({lat}, {Jday}) = \frac{\arccos(1-m_{lat_{Jday}})}{\pi} * 24 Where: .. math:: m_{lat_{Jday}} = f({lat}, {Jday}) = 1 - \tan({Lat}) * \tan \left({Axis}*\cos\left[\frac{2*\pi*{Jday}}{||{Year}||} \right] \right) The total sum of daylight hours for a given period between two days (:math:`{Jday} = 0` -> :math:`N`) within a solar year then is: .. math:: \sum({SeasonDayLength_{lat}}) = \sum_{Jday=1}^{N} dayLength_{lat_{Jday}} References ---------- Modified day-length equations for Huglin heliothermal index published in Hall, A., & Jones, G. V. (2010). Spatial analysis of climate in winegrape-growing regions in Australia. Australian Journal of Grape and Wine Research, 16(3), 389‑404. https://doi.org/10.1111/j.1755-0238.2010.00100.x Examples available from Glarner, 2006 (http://www.gandraxa.com/length_of_day.xml). """ cal = get_calendar(dates) year_length = dates.time.copy( data=[days_in_year(x, calendar=cal) for x in dates.time.dt.year]) julian_date_from_solstice = dates.time.copy(data=doy_to_days_since( dates.time.dt.dayofyear, start=summer_solstice, calendar=cal)) m_lat_dayofyear = 1 - np.tan(np.radians(lat)) * np.tan(obliquity * (np.cos( (2 * np.pi * julian_date_from_solstice) / year_length))) day_length_hours = (np.arccos(1 - m_lat_dayofyear) / np.pi) * 24 if start_date and end_date: return aggregate_between_dates(day_length_hours, start=start_date, end=end_date, op="sum", freq=freq) else: return day_length_hours
def aggregate_between_dates( data: xr.DataArray, start: Union[xr.DataArray, DayOfYearStr], end: Union[xr.DataArray, DayOfYearStr], op: str = "sum", freq: Optional[str] = None, ) -> xr.DataArray: """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data. Parameters ---------- data : xr.DataArray Data to aggregate between start and end dates. start : xr.DataArray or DayOfYearStr Start dates (as day-of-year) for the aggregation periods. end : xr.DataArray or DayOfYearStr End (as day-of-year) dates for the aggregation periods. op : {'min', 'max', 'sum', 'mean', 'std'} Operator. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan. If there is no start and/or end date, returns np.nan. """ def _get_days(_bound, _group, _base_time): """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr.""" if isinstance(_bound, str): b_i = rl.index_of_date(_group.time, _bound, max_idxs=1) # noqa if not len(b_i): return None return (_group.time.isel(time=b_i[0]) - _group.time.isel(time=0)).dt.days if _base_time in _bound.time: return _bound.sel(time=_base_time) return None if freq is None: frequencies = [] for i, bound in enumerate([start, end], start=1): try: frequencies.append(xr.infer_freq(bound.time)) except AttributeError: frequencies.append(None) good_freq = set(frequencies) - {None} if len(good_freq) != 1: raise ValueError( f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}." " Please consider providing `freq` manually.") freq = good_freq.pop() cal = get_calendar(data, dim="time") if not isinstance(start, str): start = convert_calendar(start, cal) start.attrs["calendar"] = cal start = doy_to_days_since(start) if not isinstance(end, str): end = convert_calendar(end, cal) end.attrs["calendar"] = cal end = doy_to_days_since(end) out = list() for base_time, indexes in data.resample(time=freq).groups.items(): # get group slice group = data.isel(time=indexes) start_d = _get_days(start, group, base_time) end_d = _get_days(end, group, base_time) # convert bounds for this group if start_d is not None and end_d is not None: days = (group.time - base_time).dt.days days[days < 0] = np.nan masked = group.where((days >= start_d) & (days <= end_d - 1)) res = getattr(masked, op)(dim="time", skipna=True) res = xr.where( ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())), np.nan, res) # Re-add the time dimension with the period's base time. res = res.expand_dims(time=[base_time]) out.append(res) else: # Get an array with the good shape, put nans and add the new time. res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time]) out.append(res) continue out = xr.concat(out, dim="time") return out
def doymin(da: xr.DataArray) -> xr.DataArray: """Return the day of year of the minimum value.""" i = da.argmin(dim="time") out = da.time.dt.dayofyear[i] out.attrs.update(units="", is_dayofyear=1, calendar=get_calendar(da)) return out
def prepare(self, da, freq, src_timestep, **indexer): """Prepare arrays to be fed to the `is_missing` function. Parameters ---------- da : xr.DataArray Input data. freq : str Resampling frequency defining the periods defined in http://pandas.pydata.org/pandas-docs/stable/timeseries.html#resampling. src_timestep : {"D", "H"} Expected input frequency. **indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are considered. Returns ------- xr.DataArray, xr.DataArray Boolean array indicating which values are null, array of expected number of valid values. Notes ----- If `freq=None` and an indexer is given, then missing values during period at the start or end of array won't be flagged. """ # This function can probably be made simpler once CFPeriodIndex is implemented. null = self.is_null(da, freq, **indexer) pfreq, anchor = self.split_freq(freq) c = null.sum(dim="time") # Otherwise simply use the start and end dates to find the expected number of days. if pfreq.endswith("S"): start_time = c.indexes["time"] end_time = start_time.shift(1, freq=freq) elif pfreq: end_time = c.indexes["time"] start_time = end_time.shift(-1, freq=freq) else: i = da.time.to_index() start_time = i[:1] end_time = i[-1:] if indexer: # Create a full synthetic time series and compare the number of days with the original series. t = date_range( start_time[0], end_time[-1], freq=src_timestep, calendar=get_calendar(da), ) sda = xr.DataArray(data=np.ones(len(t)), coords={"time": t}, dims=("time", )) st = generic.select_time(sda, **indexer) if freq: count = st.notnull().resample(time=freq).sum(dim="time") else: count = st.notnull().sum(dim="time") else: delta = end_time - start_time n = delta.astype(_np_timedelta64[src_timestep]) if freq: count = xr.DataArray(n.values, coords={"time": c.time}, dims="time") else: count = xr.DataArray(n.values[0] + 1) return null, count
def test_get_calendar_nonxr(obj, cal): assert get_calendar(obj) == cal