Example #1
0
    def test_calendars(self):
        # generate test DataArray
        time_std = date_range("1991-07-01", "1993-06-30", freq="D", calendar="standard")
        time_365 = date_range("1991-07-01", "1993-06-30", freq="D", calendar="noleap")
        data_std = xr.DataArray(
            np.ones((time_std.size, 4)),
            dims=("time", "lon"),
            coords={"time": time_std, "lon": [-72, -71, -70, -69]},
        )
        # generate test start and end dates
        start_v = [[200, 200, np.nan, np.nan], [200, 200, 60, 60]]
        end_v = [[200, np.nan, 60, np.nan], [360, 60, 360, 80]]
        start_std = xr.DataArray(
            start_v,
            dims=("time", "lon"),
            coords={"time": [time_std[0], time_std[366]], "lon": data_std.lon},
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )
        end_std = xr.DataArray(
            end_v,
            dims=("time", "lon"),
            coords={"time": [time_std[0], time_std[366]], "lon": data_std.lon},
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )

        end_noleap = xr.DataArray(
            end_v,
            dims=("time", "lon"),
            coords={"time": [time_365[0], time_365[365]], "lon": data_std.lon},
            attrs={"calendar": "noleap", "is_dayofyear": 1},
        )

        out = generic.aggregate_between_dates(
            data_std, start_std, end_std, op="sum", freq="AS-JUL"
        )

        # expected output
        s = doy_to_days_since(start_std)
        e = doy_to_days_since(end_std)
        expected = e - s
        expected = xr.where(((s > e) | (s.isnull()) | (e.isnull())), np.nan, expected)

        np.testing.assert_allclose(out, expected)

        # check calendar convertion
        out_noleap = generic.aggregate_between_dates(
            data_std, start_std, end_noleap, op="sum", freq="AS-JUL"
        )

        np.testing.assert_allclose(out, out_noleap)
Example #2
0
def make_ensemble(files: List[Path],
                  percentiles: List[int],
                  average_dims: Optional[Tuple[str]] = None) -> None:
    ensemble = ensembles.create_ensemble(files)
    # make sure we have data starting in 1950
    ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950))

    # If data is in day of year, percentiles won't make sense.
    # Convert to "days since" (base will be the time coordinate)
    for v in ensemble.data_vars:
        if ensemble[v].attrs.get('is_dayofyear', 0) == 1:
            ensemble[v] = doy_to_days_since(ensemble[v])

    if average_dims is not None:
        ensemble = ensemble.mean(dim=average_dims)

    ensemble_percentiles = ensembles.ensemble_percentiles(ensemble,
                                                          values=percentiles)

    # Doy data converted previously is converted back.
    for v in ensemble_percentiles.data_vars:
        if ensemble_percentiles[v].attrs.get('units',
                                             '').startswith('days after'):
            ensemble_percentiles[v] = days_since_to_doy(
                ensemble_percentiles[v])

    # Depending on the datasets, I've found that writing the netcdf could hang
    # if the dataset was not loaded explicitely previously... Not sure why.
    # The datasets should be pretty small when computing the ensembles, so this is
    # a best effort at working around what looks like a bug in either xclim or xarray.
    # The xarray documentation mentions: 'this method can be necessary when working
    # with many file objects on disk.'
    ensemble_percentiles.load()

    return ensemble_percentiles
Example #3
0
    def test_time_length(self):
        # generate test DataArray
        time_data = date_range(
            "1991-01-01", "1993-12-31", freq="D", calendar="standard"
        )
        time_start = date_range(
            "1990-01-01", "1992-12-31", freq="D", calendar="standard"
        )
        time_end = date_range("1991-01-01", "1993-12-31", freq="D", calendar="standard")
        data = xr.DataArray(
            np.ones((time_data.size, 4)),
            dims=("time", "lon"),
            coords={"time": time_data, "lon": [-72, -71, -70, -69]},
        )
        # generate test start and end dates
        start_v = [[200, 200, np.nan, np.nan], [200, 200, 60, 60], [150, 100, 40, 10]]
        end_v = [[200, np.nan, 60, np.nan], [360, 60, 360, 80], [200, 200, 60, 50]]
        start = xr.DataArray(
            start_v,
            dims=("time", "lon"),
            coords={
                "time": [time_start[0], time_start[365], time_start[730]],
                "lon": data.lon,
            },
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )
        end = xr.DataArray(
            end_v,
            dims=("time", "lon"),
            coords={
                "time": [time_end[0], time_end[365], time_end[731]],
                "lon": data.lon,
            },
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )

        out = generic.aggregate_between_dates(data, start, end, op="sum", freq="YS")

        # expected output
        s = doy_to_days_since(start)
        e = doy_to_days_since(end)
        expected = e - s
        expected[1, 1] = np.nan

        np.testing.assert_allclose(out[0:2], expected)
        np.testing.assert_allclose(out[2], np.array([np.nan, np.nan, np.nan, np.nan]))
Example #4
0
def day_lengths(
    dates: xr.DataArray,
    lat: xr.DataArray,
    obliquity: float = -0.4091,
    summer_solstice: DayOfYearStr = "06-21",
    start_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None,
    end_date: Optional[Union[xarray.DataArray, DayOfYearStr]] = None,
    freq: str = "YS",
) -> xr.DataArray:
    r"""Day-lengths according to latitude, obliquity, and day of year.

    Parameters
    ----------
    dates: xr.DataArray
    lat: xarray.DataArray
      Latitude coordinate.
    obliquity: float
      Obliquity of the elliptic (radians). Default: -0.4091.
    summer_solstice: DayOfYearStr
      Date of summer solstice in northern hemisphere. Used for approximating solar julian dates.
    start_date: xarray.DataArray or DayOfYearStr, optional
      Start date to consider for calculating mean day lengths. Default: None.
    end_date: xarray.DataArray or DayOfYearStr, optional
      End date to consider for calculating mean day lengths. Default: None.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray
      If start and end date provided, returns total sum of daylight-hour between dates at provided frequency.
      If no start and end date provided, returns day-length in hours per individual day.

    Notes
    -----
    Daylight-hours are dependent on latitude, :math:`lat`, the Julian day (solar day) from the summer solstice in the
    Northern hemisphere, :math:`Jday`, and the axial tilt :math:`Axis`, therefore day-length at any latitude for a given
    date on Earth, :math:`dayLength_{lat_{Jday}}`, for a given year in days, :math:`Year`, can be approximated as
    follows:

    .. math::
        dayLength_{lat_{Jday}} = f({lat}, {Jday}) = \frac{\arccos(1-m_{lat_{Jday}})}{\pi} * 24

    Where:

    .. math::
        m_{lat_{Jday}} = f({lat}, {Jday}) = 1 - \tan({Lat}) * \tan \left({Axis}*\cos\left[\frac{2*\pi*{Jday}}{||{Year}||} \right] \right)

    The total sum of daylight hours for a given period between two days (:math:`{Jday} = 0` -> :math:`N`) within a solar
    year then is:

    .. math::
        \sum({SeasonDayLength_{lat}}) = \sum_{Jday=1}^{N} dayLength_{lat_{Jday}}

    References
    ----------
    Modified day-length equations for Huglin heliothermal index published in Hall, A., & Jones, G. V. (2010). Spatial
    analysis of climate in winegrape-growing regions in Australia. Australian Journal of Grape and Wine Research, 16(3),
    389‑404. https://doi.org/10.1111/j.1755-0238.2010.00100.x

    Examples available from Glarner, 2006 (http://www.gandraxa.com/length_of_day.xml).
    """
    cal = get_calendar(dates)

    year_length = dates.time.copy(
        data=[days_in_year(x, calendar=cal) for x in dates.time.dt.year])

    julian_date_from_solstice = dates.time.copy(data=doy_to_days_since(
        dates.time.dt.dayofyear, start=summer_solstice, calendar=cal))

    m_lat_dayofyear = 1 - np.tan(np.radians(lat)) * np.tan(obliquity * (np.cos(
        (2 * np.pi * julian_date_from_solstice) / year_length)))

    day_length_hours = (np.arccos(1 - m_lat_dayofyear) / np.pi) * 24

    if start_date and end_date:
        return aggregate_between_dates(day_length_hours,
                                       start=start_date,
                                       end=end_date,
                                       op="sum",
                                       freq=freq)
    else:
        return day_length_hours
Example #5
0
def aggregate_between_dates(
    data: xr.DataArray,
    start: Union[xr.DataArray, DayOfYearStr],
    end: Union[xr.DataArray, DayOfYearStr],
    op: str = "sum",
    freq: Optional[str] = None,
) -> xr.DataArray:
    """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data.

    Parameters
    ----------
    data : xr.DataArray
      Data to aggregate between start and end dates.
    start : xr.DataArray or DayOfYearStr
      Start dates (as day-of-year) for the aggregation periods.
    end : xr.DataArray or DayOfYearStr
      End (as day-of-year) dates for the aggregation periods.
    op : {'min', 'max', 'sum', 'mean', 'std'}
      Operator.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
      Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan.
      If there is no start and/or end date, returns np.nan.
    """
    def _get_days(_bound, _group, _base_time):
        """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr."""
        if isinstance(_bound, str):
            b_i = rl.index_of_date(_group.time, _bound, max_idxs=1)  # noqa
            if not len(b_i):
                return None
            return (_group.time.isel(time=b_i[0]) -
                    _group.time.isel(time=0)).dt.days
        if _base_time in _bound.time:
            return _bound.sel(time=_base_time)
        return None

    if freq is None:
        frequencies = []
        for i, bound in enumerate([start, end], start=1):
            try:
                frequencies.append(xr.infer_freq(bound.time))
            except AttributeError:
                frequencies.append(None)

        good_freq = set(frequencies) - {None}

        if len(good_freq) != 1:
            raise ValueError(
                f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}."
                " Please consider providing `freq` manually.")
        freq = good_freq.pop()

    cal = get_calendar(data, dim="time")

    if not isinstance(start, str):
        start = convert_calendar(start, cal)
        start.attrs["calendar"] = cal
        start = doy_to_days_since(start)
    if not isinstance(end, str):
        end = convert_calendar(end, cal)
        end.attrs["calendar"] = cal
        end = doy_to_days_since(end)

    out = list()
    for base_time, indexes in data.resample(time=freq).groups.items():
        # get group slice
        group = data.isel(time=indexes)

        start_d = _get_days(start, group, base_time)
        end_d = _get_days(end, group, base_time)

        # convert bounds for this group
        if start_d is not None and end_d is not None:

            days = (group.time - base_time).dt.days
            days[days < 0] = np.nan

            masked = group.where((days >= start_d) & (days <= end_d - 1))
            res = getattr(masked, op)(dim="time", skipna=True)
            res = xr.where(
                ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())),
                np.nan, res)
            # Re-add the time dimension with the period's base time.
            res = res.expand_dims(time=[base_time])
            out.append(res)
        else:
            # Get an array with the good shape, put nans and add the new time.
            res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time])
            out.append(res)
            continue

    out = xr.concat(out, dim="time")
    return out
Example #6
0
    def test_frequency(self):
        # generate test DataArray
        time_data = date_range(
            "1991-01-01", "1992-05-31", freq="D", calendar="standard"
        )
        data = xr.DataArray(
            np.ones((time_data.size, 2)),
            dims=("time", "lon"),
            coords={"time": time_data, "lon": [-70, -69]},
        )
        # generate test start and end dates
        start_v = [[70, 100], [200, 200], [270, 300], [35, 35], [80, 80]]
        end_v = [[130, 70], [200, np.nan], [330, 270], [35, np.nan], [150, 150]]
        end_m_v = [[20, 20], [40, 40], [80, 80], [100, 100], [130, 130]]
        start = xr.DataArray(
            start_v,
            dims=("time", "lon"),
            coords={
                "time": [
                    time_data[59],
                    time_data[151],
                    time_data[243],
                    time_data[334],
                    time_data[425],
                ],
                "lon": data.lon,
            },
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )
        end = xr.DataArray(
            end_v,
            dims=("time", "lon"),
            coords={
                "time": [
                    time_data[59],
                    time_data[151],
                    time_data[243],
                    time_data[334],
                    time_data[425],
                ],
                "lon": data.lon,
            },
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )
        end_m = xr.DataArray(
            end_m_v,
            dims=("time", "lon"),
            coords={
                "time": [
                    time_data[0],
                    time_data[31],
                    time_data[59],
                    time_data[90],
                    time_data[120],
                ],
                "lon": data.lon,
            },
            attrs={"calendar": "standard", "is_dayofyear": 1},
        )

        out = generic.aggregate_between_dates(data, start, end, op="sum", freq="QS-DEC")

        # expected output
        s = doy_to_days_since(start)
        e = doy_to_days_since(end)
        expected = e - s
        expected = xr.where(expected < 0, np.nan, expected)

        np.testing.assert_allclose(out[0], np.array([np.nan, np.nan]))
        np.testing.assert_allclose(out[1:6], expected)

        with pytest.raises(ValueError):
            generic.aggregate_between_dates(data, start, end_m)
Example #7
0
def test_doy_to_days_since():
    # simple test
    time = date_range("2020-07-01", "2022-07-01", freq="AS-JUL")
    da = xr.DataArray(
        [190, 360, 3],
        dims=("time",),
        coords={"time": time},
        attrs={"is_dayofyear": 1, "calendar": "default"},
    )

    out = doy_to_days_since(da)
    np.testing.assert_array_equal(out, [7, 178, 186])

    assert out.attrs["units"] == "days after 07-01"
    assert "is_dayofyear" not in out.attrs

    da2 = days_since_to_doy(out)
    xr.testing.assert_identical(da, da2)

    out = doy_to_days_since(da, start="07-01")
    np.testing.assert_array_equal(out, [7, 178, 186])

    # other calendar
    out = doy_to_days_since(da, calendar="noleap")
    assert out.attrs["calendar"] == "noleap"
    np.testing.assert_array_equal(out, [8, 178, 186])

    da2 = days_since_to_doy(out)  # calendar read from attribute
    da2.attrs.pop("calendar")  # drop for identicality
    da.attrs.pop("calendar")  # drop for identicality
    xr.testing.assert_identical(da, da2)

    # with start
    time = date_range("2020-12-31", "2022-12-31", freq="Y")
    da = xr.DataArray(
        [190, 360, 3],
        dims=("time",),
        coords={"time": time},
        name="da",
        attrs={"is_dayofyear": 1, "calendar": "default"},
    )

    out = doy_to_days_since(da, start="01-02")
    np.testing.assert_array_equal(out, [188, 358, 1])

    da2 = days_since_to_doy(out)  # start read from attribute
    assert da2.name == da.name
    xr.testing.assert_identical(da, da2)

    # finer freq
    time = date_range("2020-01-01", "2020-03-01", freq="MS")
    da = xr.DataArray(
        [15, 33, 66],
        dims=("time",),
        coords={"time": time},
        name="da",
        attrs={"is_dayofyear": 1, "calendar": "default"},
    )

    out = doy_to_days_since(da)
    assert out.attrs["units"] == "days after time coordinate"
    np.testing.assert_array_equal(out, [14, 1, 5])

    da2 = days_since_to_doy(out)  # start read from attribute
    xr.testing.assert_identical(da, da2)