Beispiel #1
0
    def train(
        self,
        ref: DataArray,
        hist: DataArray,
    ):
        """Train the adjustment object. Refer to the class documentation for the algorithm details.

        Parameters
        ----------
        ref : DataArray
          Training target, usually a reference time series drawn from observations.
        hist : DataArray
          Training data, usually a model output whose biases are to be adjusted.
        """
        if self._trained:
            warn("train() was already called, overwriting old results.")

        if hasattr(self, "group"):
            # Right now there is no other way of getting the main adjustment dimension
            _raise_on_multiple_chunk(ref, self.group.dim)
            _raise_on_multiple_chunk(hist, self.group.dim)

            if self.group.prop == "dayofyear" and get_calendar(
                    ref) != get_calendar(hist):
                warn(
                    ("Input ref and hist are defined on different calendars, "
                     "this is not recommended when using 'dayofyear' grouping "
                     "and could give strange results. See `xclim.core.calendar` "
                     "for tools to convert your data to a common calendar."),
                    stacklevel=4,
                )

        self["hist_calendar"] = get_calendar(hist)
        self._train(ref, hist)
Beispiel #2
0
    def train(cls, ref: DataArray, hist: DataArray, **kwargs):
        """Train the adjustment object. Refer to the class documentation for the algorithm details.

        Parameters
        ----------
        ref : DataArray
          Training target, usually a reference time series drawn from observations.
        hist : DataArray
          Training data, usually a model output whose biases are to be adjusted.
        """
        kwargs = parse_group(cls._train, kwargs)
        skip_checks = kwargs.pop("skip_input_checks", False)

        if not skip_checks:
            (ref, hist), train_units = cls._harmonize_units(ref, hist)

            if "group" in kwargs:
                cls._check_inputs(ref, hist, group=kwargs["group"])

            hist = convert_units_to(hist, ref)
        else:
            train_units = ""

        ds, params = cls._train(ref, hist, **kwargs)
        obj = cls(
            _trained=True,
            hist_calendar=get_calendar(hist),
            train_units=train_units,
            **params,
        )
        obj.set_dataset(ds)
        return obj
Beispiel #3
0
    def get_coordinate(self, ds=None):
        """Return the coordinate as in the output of group.apply.

        Currently, only implemented for groupings with prop == month or dayofyear.
        For prop == dayfofyear, a ds (Dataset or DataArray) can be passed to infer
        the max doy from the available years and calendar.
        """
        if self.prop == "month":
            return xr.DataArray(np.arange(1, 13),
                                dims=("month", ),
                                name="month")
        if self.prop == "season":
            return xr.DataArray(["DJF", "MAM", "JJA", "SON"],
                                dims=("season", ),
                                name="season")
        if self.prop == "dayofyear":
            if ds is not None:
                cal = get_calendar(ds, dim=self.dim)
                mdoy = max(
                    days_in_year(yr, cal)
                    for yr in np.unique(ds[self.dim].dt.year))
            else:
                mdoy = 365
            return xr.DataArray(np.arange(1, mdoy + 1),
                                dims=("dayofyear"),
                                name="dayofyear")
        if self.prop == "group":
            return xr.DataArray([1], dims=("group", ), name="group")
        # TODO woups what happens when there is no group? (prop is None)
        raise NotImplementedError()
Beispiel #4
0
    def adjust(self, sim: DataArray, **kwargs):
        """Return bias-adjusted data. Refer to the class documentation for the algorithm details.

        Parameters
        ----------
        sim : DataArray
          Time series to be bias-adjusted, usually a model output.
        kwargs :
          Algorithm-specific keyword arguments, see class doc.
        """
        if not self._trained:
            raise ValueError("train() must be called before adjusting.")

        if hasattr(self, "group"):
            # Right now there is no other way of getting the main adjustment dimension
            _raise_on_multiple_chunk(sim, self.group.dim)

            if (self.group.prop == "dayofyear"
                    and get_calendar(sim) != self.hist_calendar):
                warn(
                    ("This adjustment was trained on a simulation with the "
                     f"{self._hist_calendar} calendar but the sim input uses "
                     f"{get_calendar(sim)}. This is not recommended with dayofyear "
                     "grouping and could give strange results."),
                    stacklevel=4,
                )

        scen = self._adjust(sim, **kwargs)
        params = ", ".join([f"{k}={repr(v)}" for k, v in kwargs.items()])
        scen.attrs["xclim_history"] = update_history(
            f"Bias-adjusted with {str(self)}.adjust(sim, {params})", sim)
        return scen
Beispiel #5
0
def test_convert_calendar(source, target, target_as_str, freq):
    src = xr.DataArray(
        date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source),
        dims=("time", ),
        name="time",
    )
    da_src = xr.DataArray(np.linspace(0, 1, src.size),
                          dims=("time", ),
                          coords={"time": src})
    tgt = xr.DataArray(
        date_range("2004-01-01", "2004-12-31", freq=freq, calendar=target),
        dims=("time", ),
        name="time",
    )

    conv = convert_calendar(da_src, target if target_as_str else tgt)

    assert get_calendar(conv) == target

    if target_as_str and max_doy[source] < max_doy[target]:
        assert conv.size == src.size
    elif not target_as_str:
        assert conv.size == tgt.size

        assert conv.isnull().sum() == max(max_doy[target] - max_doy[source], 0)
Beispiel #6
0
def snd_max_doy(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray:
    """Maximum snow depth day of year.

    Day of year when surface snow reaches its peak value. If snow depth is 0 over entire period, return NaN.

    Parameters
    ----------
    snd : xarray.DataArray
      Surface snow depth.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray
      The day of year at which snow depth reaches its maximum value.
    """
    from xclim.core.missing import at_least_n_valid

    # Identify periods where there is at least one non-null value for snow depth
    valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq)

    # Compute doymax. Will return first time step if all snow depths are 0.
    out = generic.select_resample_op(snd, op=generic.doymax, freq=freq)
    out.attrs.update(units="", is_dayofyear=1, calendar=get_calendar(snd))

    # Mask arrays that miss at least one non-null snd.
    return out.where(~valid)
Beispiel #7
0
def test_convert_calendar_360_days(source, target, freq, align_on):
    src = xr.DataArray(
        date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source),
        dims=("time", ),
        name="time",
    )
    da_src = xr.DataArray(np.linspace(0, 1, src.size),
                          dims=("time", ),
                          coords={"time": src})

    conv = convert_calendar(da_src, target, align_on=align_on)

    assert get_calendar(conv) == target

    if align_on == "date":
        np.testing.assert_array_equal(
            conv.time.resample(time="M").last().dt.day,
            [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30],
        )
    elif target == "360_day":
        np.testing.assert_array_equal(
            conv.time.resample(time="M").last().dt.day,
            [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29],
        )
    else:
        np.testing.assert_array_equal(
            conv.time.resample(time="M").last().dt.day,
            [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31],
        )
    if source == "360_day" and align_on == "year":
        assert conv.size == 360 if freq == "D" else 360 * 4
    else:
        assert conv.size == 359 if freq == "D" else 359 * 4
Beispiel #8
0
def doymin(da: xr.DataArray) -> xr.DataArray:
    """Return the day of year of the minimum value."""
    i = da.argmin(dim="time")
    out = da.time.dt.dayofyear.isel(time=i, drop=True)
    out.attrs.update(units="",
                     is_dayofyear=np.int32(1),
                     calendar=get_calendar(da))
    return out
Beispiel #9
0
def test_convert_calendar_360_days_random():
    da_std = xr.DataArray(
        np.linspace(0, 1, 366 * 2),
        dims=("time",),
        coords={
            "time": date_range(
                "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default"
            )
        },
    )
    da_360 = xr.DataArray(
        np.linspace(0, 1, 360 * 2),
        dims=("time",),
        coords={
            "time": date_range(
                "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day"
            )
        },
    )

    conv = convert_calendar(da_std, "360_day", align_on="random")
    assert get_calendar(conv) == "360_day"
    assert conv.size == 720
    conv2 = convert_calendar(da_std, "360_day", align_on="random")
    assert (conv != conv2).any()

    conv = convert_calendar(da_360, "default", align_on="random")
    assert get_calendar(conv) == "default"
    assert conv.size == 720
    assert np.datetime64("2004-02-29") not in conv.time
    conv2 = convert_calendar(da_360, "default", align_on="random")
    assert (conv2 != conv).any()

    conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN)
    conv = conv.where(conv.isnull(), drop=True)
    nandoys = conv.time.dt.dayofyear[::2]
    assert all(nandoys < np.array([74, 147, 220, 293, 366]))
    assert all(nandoys > np.array([0, 73, 146, 219, 292]))
Beispiel #10
0
def unpack_moving_yearly_window(da: xr.DataArray, dim: str = "movingwin"):
    """Unpack a constructed moving window dataset to a normal timeseries, only keeping the central data.

    Unpack DataArrays created with :py:func:`construct_moving_yearly_window` and recreate a timeseries data.
    Only keeps the central non-overlapping years. The final timeseries will be (window - step) years shorter than
    the initial one.

    The window length and window step are inferred from the coordinates.

    Parameters
    ----------
    da: xr.DataArray
      As constructed by :py:func:`construct_moving_yearly_window`.
    dim : str
      The window dimension name as given to the construction function.
    """
    # Get number of samples by year (and perform checks)
    N_in_year = _get_number_of_elements_by_year(da.time)

    # Might be smaller than the original moving window, doesn't matter
    window = da.time.size / N_in_year

    if window % 1 != 0:
        warnings.warn(
            f"Incomplete data received as number of years covered is not an integer ({window})"
        )

    # Get step in number of years
    days_in_year = max_doy[get_calendar(da)]
    step = np.unique(da[dim].diff(dim).dt.days / days_in_year)
    if len(step) > 1:
        raise ValueError("The spacing between the windows is not equal.")
    step = int(step[0])

    # Which years to keep: length step, in the middle of window
    left = int((window - step) // 2)  # first year to keep

    # Keep only the middle years
    da = da.isel(time=slice(left * N_in_year, (left + step) * N_in_year))

    out = []
    for win_start in da[dim]:
        slc = da.sel({dim: win_start}).drop_vars(dim)
        dt = win_start.values - da[dim][0].values
        slc["time"] = slc.time + dt
        out.append(slc)

    return xr.concat(out, "time")
Beispiel #11
0
    def _check_inputs(cls, *inputs, group):
        """
        Raises an error if there are chunks along the main dimension.
        Also raises if cls._allow_diff_calendars is False and calendars differ.
        """
        for inda in inputs:
            if uses_dask(inda) and len(inda.chunks[inda.get_axis_num(group.dim)]) > 1:
                raise ValueError(
                    f"Multiple chunks along the main adjustment dimension {group.dim} is not supported."
                )

        # All calendars used by the inputs
        calendars = {get_calendar(inda, group.dim) for inda in inputs}
        if not cls._allow_diff_calendars and len(calendars) > 1:
            raise ValueError(
                "Inputs are defined on different calendars,"
                f" this is not supported for {cls.__name__} adjustment."
            )

        # Check multivariate dimensions
        mvcrds = []
        for inda in inputs:
            for crd in inda.coords.values():
                if crd.attrs.get("is_variables", False):
                    mvcrds.append(crd)
        if mvcrds and (
            not all(mvcrds[0].equals(mv) for mv in mvcrds[1:])
            or len(mvcrds) != len(inputs)
        ):
            raise ValueError(
                "Inputs have different multivariate coordinates "
                f"({set(mv.name for mv in mvcrds)})."
            )

        if group.prop == "dayofyear" and (
            "default" in calendars or "standard" in calendars
        ):
            warn(
                "Strange results could be returned when using dayofyear grouping "
                "on data defined in the proleptic_gregorian calendar "
            )
Beispiel #12
0
def test_interp_calendar(source, target):
    src = xr.DataArray(
        date_range("2004-01-01", "2004-07-30", freq="D", calendar=source),
        dims=("time", ),
        name="time",
    )
    tgt = xr.DataArray(
        date_range("2004-01-01", "2004-07-30", freq="D", calendar=target),
        dims=("time", ),
        name="time",
    )
    da_src = xr.DataArray(np.linspace(0, 1, src.size),
                          dims=("time", ),
                          coords={"time": src})
    conv = interp_calendar(da_src, tgt)

    assert conv.size == tgt.size
    assert get_calendar(conv) == target

    np.testing.assert_almost_equal(conv.max(), 1, 2)
    assert conv.min() == 0
Beispiel #13
0
    def adjust(self, sim: DataArray, **kwargs):
        """Return bias-adjusted data. Refer to the class documentation for the algorithm details.

        Parameters
        ----------
        sim : DataArray
          Time series to be bias-adjusted, usually a model output.
        """
        if not self.__trained:
            raise ValueError("train() must be called before adjusting.")
        if (hasattr(self, "group") and self.group.prop == "dayofyear"
                and get_calendar(sim) != self._hist_calendar):
            warn(
                ("This adjustment was trained on a simulation with the "
                 f"{self._hist_calendar} calendar but the sim input uses "
                 f"{get_calendar(sim)}. This is not recommended with dayofyear "
                 "grouping and could give strange results."),
                stacklevel=4,
            )
        scen = self._adjust(sim, **kwargs)
        scen.attrs["bias_adjusted"] = True
        return scen
Beispiel #14
0
def _get_number_of_elements_by_year(time):
    """Get the number of elements in time in a year by inferring its sampling frequency.

    Only calendar with uniform year lengths are supported : 360_day, noleap, all_leap.
    """
    cal = get_calendar(time)

    # Calendar check
    if cal in ["standard", "gregorian", "default", "proleptic_gregorian"]:
        raise ValueError(
            "For moving window computations, the data must have a uniform calendar (360_day, no_leap or all_leap)"
        )

    mult, freq, _, _ = parse_offset(xr.infer_freq(time))
    days_in_year = max_doy[cal]
    elements_in_year = {"Q": 4, "M": 12, "D": days_in_year, "H": days_in_year * 24}
    N_in_year = elements_in_year.get(freq, 1) / mult
    if N_in_year % 1 != 0:
        raise ValueError(
            f"Sampling frequency of the data must be Q, M, D or H and evenly divide a year (got {mult}{freq})."
        )

    return int(N_in_year)
Beispiel #15
0
def test_ensure_cftime_array(inp, calout):
    out = ensure_cftime_array(inp)
    assert get_calendar(out) == calout
Beispiel #16
0
def test_get_calendar(file, cal, maxdoy):
    with open_dataset(os.path.join(*file)) as ds:
        out_cal = get_calendar(ds)
        assert cal == out_cal
        assert max_doy[cal] == maxdoy
Beispiel #17
0
def xclim_convert_360day_calendar_interpolate(
    ds,
    target="noleap",
    align_on="random",
    interpolation="linear",
    return_indices=False,
    ignore_nans=True,
):
    """
    Parameters
    ----------
    ds : xr.Dataset
    target : str
        see xclim.core.calendar.convert_calendar
    align_on : str
        this determines which days in the calendar will have missing values or will be the product of interpolation, if there is.
        It could be every year the same calendar days, or the days could randomly change. see xclim.core.calendar.convert_calendar
    interpolation : None or str
        passed to xr.Dataset.interpolate_na if not None
    return_indices : bool
        on top of the converted dataset, return a list of the array indices identifying values that were inserted.
        This assumes there were no NaNs before conversion.
    ignore_nans : bool
        if False and there are any NaNs in `ds` variables, an assertion error will be raised. NaNs are ignored otherwise.
    Returns
    -------
    tuple(xr.Dataset, xr.Dataset) if return_indices is True, xr.Dataset otherwise.

    Notes
    -----
    The default values of `target`, `align_on` and `interpolation` mean that our default approach is equivalent to that of the LOCA
    calendar conversion [1] for conversion from 360 days calendars to noleap calendars. In that approach, 5 calendar days are added (noleap
    calendars always have 365 days) to each year. But those calendar days are not necessarily those that will have their value be the product
    of interpolation. The days for which we interpolate are selected randomly every block of 72 days, so that they change every year.

    [1] http://loca.ucsd.edu/loca-calendar/
    """

    if get_calendar(ds) != "360_day":
        raise ValueError(
            "tried to use 360 day calendar conversion for a non-360-day calendar dataset"
        )

    if not ignore_nans:
        for var in ds:
            assert (
                ds[var].isnull().sum() == 0
            ), "360 days calendar conversion with interpolation : there are nans !"

    ds_converted = convert_calendar(
        ds, target=target, align_on=align_on, missing=np.NaN
    )

    if interpolation:
        ds_out = ds_converted.interpolate_na("time", interpolation)
    else:
        ds_out = ds_converted

    if return_indices:
        return (ds_out, xr.ufuncs.isnan(ds_converted))
    else:
        return ds_out
Beispiel #18
0
def standardize_gcm(ds, leapday_removal=True):
    """

    360 calendar conversion requires that there are no chunks in
    the 'time' dimension of `ds`.

    Parameters
    ----------
    ds : xr.Dataset
    leapday_removal : bool, optional

    Returns
    -------
    xr.Dataset
    """
    # Remove cruft coordinates, variables, dims.
    cruft_vars = ("height", "member_id", "time_bnds")

    dims_to_squeeze = []
    coords_to_drop = []
    for v in cruft_vars:
        if v in ds.dims:
            dims_to_squeeze.append(v)
        elif v in ds.coords:
            coords_to_drop.append(v)

    ds_cleaned = ds.squeeze(dims_to_squeeze, drop=True).reset_coords(
        coords_to_drop, drop=True
    )

    # Cleanup time.

    # if variable is precip, need to update units to mm day-1
    if "pr" in ds_cleaned.variables:
        # units should be kg/m2/s in CMIP6 output
        if ds_cleaned["pr"].units == "kg m-2 s-1":
            # convert to mm/day
            mmday_conversion = 24 * 60 * 60
            ds_cleaned["pr"] = ds_cleaned["pr"] * mmday_conversion
            # update units attribute
            ds_cleaned["pr"].attrs["units"] = "mm day-1"
        else:
            # we want this to fail, as pr units are something we don't expect
            raise ValueError("check units: pr units attribute is not kg m-2 s-1")

    cal = get_calendar(ds_cleaned)

    if (
        cal == "360_day" or leapday_removal
    ):  # calendar conversion is necessary in either case
        # if calendar is just integers, xclim cannot understand it
        if ds_cleaned.time.dtype == "int64":
            ds_cleaned["time"] = xr.decode_cf(ds_cleaned).time
        if cal == "360_day":

            # Cannot have chunks in time dimension for 360 day calendar conversion so loading
            # data into memory.
            ds_cleaned.load()

            if leapday_removal:  # 360 day -> noleap
                ds_converted = xclim_convert_360day_calendar_interpolate(
                    ds=ds_cleaned,
                    target="noleap",
                    align_on="random",
                    interpolation="linear",
                )
            else:  # 360 day -> standard
                ds_converted = xclim_convert_360day_calendar_interpolate(
                    ds=ds_cleaned,
                    target="standard",
                    align_on="random",
                    interpolation="linear",
                )
        else:  # any -> noleap
            # remove leap days and update calendar
            ds_converted = xclim_remove_leapdays(ds_cleaned)

        # rechunk, otherwise chunks are different sizes
        ds_out = ds_converted.chunk(
            {"time": 730, "lat": len(ds_cleaned.lat), "lon": len(ds_cleaned.lon)}
        )

    else:
        ds_out = ds_cleaned

    return ds_out
Beispiel #19
0
def _ens_align_datasets(
    datasets: List[Union[xr.Dataset, Path, str, List[Union[Path, str]]]],
    mf_flag: bool = False,
    resample_freq: str = None,
    calendar: str = "default",
    **xr_kwargs,
) -> List[xr.Dataset]:
    """Create a list of aligned xarray Datasets for ensemble Dataset creation.

    Parameters
    ----------
    datasets : List[Union[xr.Dataset, xr.DataArray, Path, str, List[Path, str]]]
      List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of lists where
      each sublist contains input .nc files of an xarray multifile Dataset. DataArrays should have a name so they can be converted to datasets.
    mf_flag : bool
      If True climate simulations are treated as xarray multifile datasets before concatenation.
      Only applicable when datasets is a sequence of file paths.
    resample_freq : Optional[str]
      If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned.
      If resample_freq is set, the time coordinate of each members will be modified to fit this frequency.
    calendar : str
      The calendar of the time coordinate of the ensemble. For conversions involving '360_day', the align_on='date' option is used.
      See `xclim.core.calendar.convert_calendar`. 'default' is the standard calendar using np.datetime64 objects.
    xr_kwargs :
      Any keyword arguments to be given to xarray when opening the files.

    Returns
    -------
    List[xr.Dataset]
    """
    xr_kwargs.setdefault("chunks", "auto")
    xr_kwargs.setdefault("decode_times", False)

    ds_all = []
    for i, n in enumerate(datasets):
        logging.info(f"Accessing {n} of {len(datasets)}")
        if mf_flag:
            ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs)
        else:
            if isinstance(n, xr.Dataset):
                ds = n
            elif isinstance(n, xr.DataArray):
                ds = n.to_dataset()
            else:
                ds = xr.open_dataset(n, **xr_kwargs)

        if "time" in ds.coords:
            time = xr.decode_cf(ds).time

            if resample_freq is not None:
                counts = time.resample(time=resample_freq).count()
                if any(counts > 1):
                    raise ValueError(
                        f"Alignment of dataset #{i:02d} failed : its time axis cannot be resampled to freq {resample_freq}."
                    )
                time = counts.time

            ds["time"] = time

            cal = get_calendar(time)
            ds = convert_calendar(
                ds,
                calendar,
                align_on="date" if "360_day" in [cal, calendar] else None,
            )

        ds_all.append(ds)

    return ds_all
Beispiel #20
0
def test_get_calendar_errors(obj):
    with pytest.raises(ValueError, match="Calendar could not be inferred from object"):
        get_calendar(obj)
Beispiel #21
0
def day_lengths(
    dates: xr.DataArray,
    lat: xr.DataArray,
    obliquity: float = -0.4091,
    summer_solstice: DayOfYearStr = "06-21",
    start_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None,
    end_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None,
    freq: str = "YS",
) -> xr.DataArray:
    r"""Day-lengths according to latitude, obliquity, and day of year.

    Parameters
    ----------
    dates: xr.DataArray
    lat: xarray.DataArray
      Latitude coordinate.
    obliquity: float
      Obliquity of the elliptic (radians). Default: -0.4091.
    summer_solstice: DayOfYearStr
      Date of summer solstice in northern hemisphere. Used for approximating solar julian dates.
    start_date: xarray.DataArray or DayOfYearStr, optional
      Start date to consider for calculating mean day lengths. Default: None.
    end_date: xarray.DataArray or DayOfYearStr, optional
      End date to consider for calculating mean day lengths. Default: None.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray
      If start and end date provided, returns total sum of daylight-hour between dates at provided frequency.
      If no start and end date provided, returns day-length in hours per individual day.

    Notes
    -----
    Daylight-hours are dependent on latitude, :math:`lat`, the Julian day (solar day) from the summer solstice in the
    Northern hemisphere, :math:`Jday`, and the axial tilt :math:`Axis`, therefore day-length at any latitude for a given
    date on Earth, :math:`dayLength_{lat_{Jday}}`, for a given year in days, :math:`Year`, can be approximated as
    follows:

    .. math::
        dayLength_{lat_{Jday}} = f({lat}, {Jday}) = \frac{\arccos(1-m_{lat_{Jday}})}{\pi} * 24

    Where:

    .. math::
        m_{lat_{Jday}} = f({lat}, {Jday}) = 1 - \tan({Lat}) * \tan \left({Axis}*\cos\left[\frac{2*\pi*{Jday}}{||{Year}||} \right] \right)

    The total sum of daylight hours for a given period between two days (:math:`{Jday} = 0` -> :math:`N`) within a solar
    year then is:

    .. math::
        \sum({SeasonDayLength_{lat}}) = \sum_{Jday=1}^{N} dayLength_{lat_{Jday}}

    References
    ----------
    Modified day-length equations for Huglin heliothermal index published in Hall, A., & Jones, G. V. (2010). Spatial
    analysis of climate in winegrape-growing regions in Australia. Australian Journal of Grape and Wine Research, 16(3),
    389‑404. https://doi.org/10.1111/j.1755-0238.2010.00100.x

    Examples available from Glarner, 2006 (http://www.gandraxa.com/length_of_day.xml).
    """
    cal = get_calendar(dates)

    year_length = dates.time.copy(
        data=[days_in_year(x, calendar=cal) for x in dates.time.dt.year])

    julian_date_from_solstice = dates.time.copy(data=doy_to_days_since(
        dates.time.dt.dayofyear, start=summer_solstice, calendar=cal))

    m_lat_dayofyear = 1 - np.tan(np.radians(lat)) * np.tan(obliquity * (np.cos(
        (2 * np.pi * julian_date_from_solstice) / year_length)))

    day_length_hours = (np.arccos(1 - m_lat_dayofyear) / np.pi) * 24

    if start_date and end_date:
        return aggregate_between_dates(day_length_hours,
                                       start=start_date,
                                       end=end_date,
                                       op="sum",
                                       freq=freq)
    else:
        return day_length_hours
Beispiel #22
0
def aggregate_between_dates(
    data: xr.DataArray,
    start: Union[xr.DataArray, DayOfYearStr],
    end: Union[xr.DataArray, DayOfYearStr],
    op: str = "sum",
    freq: Optional[str] = None,
) -> xr.DataArray:
    """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data.

    Parameters
    ----------
    data : xr.DataArray
      Data to aggregate between start and end dates.
    start : xr.DataArray or DayOfYearStr
      Start dates (as day-of-year) for the aggregation periods.
    end : xr.DataArray or DayOfYearStr
      End (as day-of-year) dates for the aggregation periods.
    op : {'min', 'max', 'sum', 'mean', 'std'}
      Operator.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
      Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan.
      If there is no start and/or end date, returns np.nan.
    """
    def _get_days(_bound, _group, _base_time):
        """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr."""
        if isinstance(_bound, str):
            b_i = rl.index_of_date(_group.time, _bound, max_idxs=1)  # noqa
            if not len(b_i):
                return None
            return (_group.time.isel(time=b_i[0]) -
                    _group.time.isel(time=0)).dt.days
        if _base_time in _bound.time:
            return _bound.sel(time=_base_time)
        return None

    if freq is None:
        frequencies = []
        for i, bound in enumerate([start, end], start=1):
            try:
                frequencies.append(xr.infer_freq(bound.time))
            except AttributeError:
                frequencies.append(None)

        good_freq = set(frequencies) - {None}

        if len(good_freq) != 1:
            raise ValueError(
                f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}."
                " Please consider providing `freq` manually.")
        freq = good_freq.pop()

    cal = get_calendar(data, dim="time")

    if not isinstance(start, str):
        start = convert_calendar(start, cal)
        start.attrs["calendar"] = cal
        start = doy_to_days_since(start)
    if not isinstance(end, str):
        end = convert_calendar(end, cal)
        end.attrs["calendar"] = cal
        end = doy_to_days_since(end)

    out = list()
    for base_time, indexes in data.resample(time=freq).groups.items():
        # get group slice
        group = data.isel(time=indexes)

        start_d = _get_days(start, group, base_time)
        end_d = _get_days(end, group, base_time)

        # convert bounds for this group
        if start_d is not None and end_d is not None:

            days = (group.time - base_time).dt.days
            days[days < 0] = np.nan

            masked = group.where((days >= start_d) & (days <= end_d - 1))
            res = getattr(masked, op)(dim="time", skipna=True)
            res = xr.where(
                ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())),
                np.nan, res)
            # Re-add the time dimension with the period's base time.
            res = res.expand_dims(time=[base_time])
            out.append(res)
        else:
            # Get an array with the good shape, put nans and add the new time.
            res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time])
            out.append(res)
            continue

    out = xr.concat(out, dim="time")
    return out
Beispiel #23
0
def doymin(da: xr.DataArray) -> xr.DataArray:
    """Return the day of year of the minimum value."""
    i = da.argmin(dim="time")
    out = da.time.dt.dayofyear[i]
    out.attrs.update(units="", is_dayofyear=1, calendar=get_calendar(da))
    return out
Beispiel #24
0
    def prepare(self, da, freq, src_timestep, **indexer):
        """Prepare arrays to be fed to the `is_missing` function.

        Parameters
        ----------
        da : xr.DataArray
          Input data.
        freq : str
          Resampling frequency defining the periods defined in
          http://pandas.pydata.org/pandas-docs/stable/timeseries.html#resampling.
        src_timestep : {"D", "H"}
          Expected input frequency.
        **indexer : {dim: indexer, }, optional
          Time attribute and values over which to subset the array. For example, use season='DJF' to select winter
          values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given,
          all values are considered.

        Returns
        -------
        xr.DataArray, xr.DataArray
          Boolean array indicating which values are null, array of expected number of valid values.

        Notes
        -----
        If `freq=None` and an indexer is given, then missing values during period at the start or end of array won't be
        flagged.
        """
        # This function can probably be made simpler once CFPeriodIndex is implemented.
        null = self.is_null(da, freq, **indexer)

        pfreq, anchor = self.split_freq(freq)

        c = null.sum(dim="time")

        # Otherwise simply use the start and end dates to find the expected number of days.
        if pfreq.endswith("S"):
            start_time = c.indexes["time"]
            end_time = start_time.shift(1, freq=freq)
        elif pfreq:
            end_time = c.indexes["time"]
            start_time = end_time.shift(-1, freq=freq)
        else:
            i = da.time.to_index()
            start_time = i[:1]
            end_time = i[-1:]

        if indexer:
            # Create a full synthetic time series and compare the number of days with the original series.
            t = date_range(
                start_time[0],
                end_time[-1],
                freq=src_timestep,
                calendar=get_calendar(da),
            )

            sda = xr.DataArray(data=np.ones(len(t)),
                               coords={"time": t},
                               dims=("time", ))
            st = generic.select_time(sda, **indexer)
            if freq:
                count = st.notnull().resample(time=freq).sum(dim="time")
            else:
                count = st.notnull().sum(dim="time")

        else:
            delta = end_time - start_time
            n = delta.astype(_np_timedelta64[src_timestep])

            if freq:
                count = xr.DataArray(n.values,
                                     coords={"time": c.time},
                                     dims="time")
            else:
                count = xr.DataArray(n.values[0] + 1)

        return null, count
Beispiel #25
0
def test_get_calendar_nonxr(obj, cal):
    assert get_calendar(obj) == cal