Ejemplo n.º 1
0
def test_stack_by_init_date(example_da_timeseries, offset, stride,
                            n_lead_steps, data_object):
    """Test values returned by stack_by_init_date"""
    def _np_stack_by_init_date(data, indexes, n_lead_steps):
        """Stack timeseries by index and n_lead_steps"""
        ver = np.empty((len(indexes), n_lead_steps))
        ver[:] = np.nan
        for i, idx in enumerate(indexes):
            time_slice = data[idx:idx + n_lead_steps]
            ver[i, :len(time_slice)] = time_slice
        return ver

    data = example_da_timeseries
    if data_object == "Dataset":
        data = data.to_dataset(name="var")

    if stride == "irregular":
        indexes = np.concatenate(
            ([offset], np.random.randint(1, 20, size=1000))).cumsum()
        indexes = indexes[indexes < data.sizes[pytest.TIME_DIM]]
    else:
        indexes = range(offset, data.sizes[pytest.TIME_DIM], stride)

    init_dates = data[pytest.TIME_DIM][indexes]
    res = stack_by_init_date(
        data,
        init_dates,
        n_lead_steps,
        init_dim=pytest.INIT_DIM,
        lead_dim=pytest.LEAD_DIM,
    )

    if data_object == "Dataset":
        res = res["var"]
        data = data["var"]

    ver = _np_stack_by_init_date(data, indexes, n_lead_steps)

    # Check that values are correct
    npt.assert_allclose(res, ver)

    # Check that init dates are correct
    npt.assert_allclose(
        xr.CFTimeIndex(init_dates.values).asi8,
        res.get_index(pytest.INIT_DIM).asi8)

    # Check that times at lead zero match the init dates
    npt.assert_allclose(
        xr.CFTimeIndex(init_dates.values).asi8,
        xr.CFTimeIndex(res[pytest.TIME_DIM].isel({
            pytest.LEAD_DIM: 0
        }).values).asi8,
    )
Ejemplo n.º 2
0
def convert_time_index(xobj,
                       time_string,
                       kind,
                       calendar=HINDCAST_CALENDAR_STR):
    """Converts incoming time index to a standard xr.CFTimeIndex.

    Args:
        xobj (xarray object): Dataset or DataArray with a time dimension to convert.
        time_string (str): Name of time dimension.
        kind (str): Kind of object for error message.
        calendar (str): calendar to set time dimension to.

    Returns:
        Dataset or DataArray with converted time dimension. If incoming time index is
        ``xr.CFTimeIndex``, returns the same index. If ``pd.DatetimeIndex``, converts to
        ``cftime.ProlepticGregorian``. If ``pd.Int64Index`` or ``pd.Float64Index``,
        assumes annual resolution and returns year-start ``cftime.ProlepticGregorian``.

    Raises:
        ValueError: If ``time_index`` is not an ``xr.CFTimeIndex``, ``pd.Int64Index``,
            ``pd.Float64Index``, or ``pd.DatetimeIndex``.
    """
    xobj = xobj.copy()  # Ensures that main object index is not overwritten.
    time_index = xobj[time_string].to_index()

    if not isinstance(time_index, xr.CFTimeIndex):

        if isinstance(time_index, pd.DatetimeIndex):
            # Extract year, month, day strings from datetime.
            time_strings = [str(t) for t in time_index]
            split_dates = [d.split(' ')[0].split('-') for d in time_strings]

        # If Float64Index or Int64Index, assume annual and convert accordingly.
        elif isinstance(time_index, pd.Float64Index) | isinstance(
                time_index, pd.Int64Index):
            warnings.warn(
                'Assuming annual resolution due to numeric inits. '
                'Change init to a datetime if it is another resolution.')
            # TODO: What about decimal time? E.g. someone has 1955.5 or something?
            dates = [str(int(t)) + '-01-01' for t in time_index]
            split_dates = [d.split('-') for d in dates]
            if 'lead' in xobj.dims:
                # Probably the only case we can assume lead units, since `lead` does not
                # give us any information on this.
                xobj['lead'].attrs['units'] = 'years'

        else:
            raise ValueError(f'Your {kind} object must be pd.Float64Index, '
                             'pd.Int64Index, xr.CFTimeIndex or '
                             'pd.DatetimeIndex.')
        # TODO: Account for differing calendars. Currently assuming `Gregorian`.
        cftime_dates = [
            getattr(cftime, calendar)(int(y), int(m), int(d))
            for (y, m, d) in split_dates
        ]
        time_index = xr.CFTimeIndex(cftime_dates)
        xobj[time_string] = time_index

    return xobj
Ejemplo n.º 3
0
def test_round(rounding_index, date_type):
    result = rounding_index.round("S")
    expected = xr.CFTimeIndex([
        date_type(1, 1, 1, 2, 0, 0, 0),
        date_type(1, 1, 1, 3, 0, 2, 0),
        date_type(1, 1, 1, 7, 0, 6, 0),
    ])
    assert result.equals(expected)
Ejemplo n.º 4
0
def test_floor(rounding_index, date_type):
    result = rounding_index.floor("S")
    expected = xr.CFTimeIndex([
        date_type(1, 1, 1, 1, 59, 59, 0),
        date_type(1, 1, 1, 3, 0, 1, 0),
        date_type(1, 1, 1, 7, 0, 6, 0),
    ])
    assert result.equals(expected)
Ejemplo n.º 5
0
def rounding_index(date_type):
    return xr.CFTimeIndex(
        [
            date_type(1, 1, 1, 1, 59, 59, 999512),
            date_type(1, 1, 1, 3, 0, 1, 500001),
            date_type(1, 1, 1, 7, 0, 6, 499999),
        ]
    )
Ejemplo n.º 6
0
def test_asi8_distant_date():
    """Test that asi8 conversion is truly exact."""
    import cftime

    date_type = cftime.DatetimeProlepticGregorian
    index = xr.CFTimeIndex([date_type(10731, 4, 22, 3, 25, 45, 123456)])
    result = index.asi8
    expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456])
    np.testing.assert_array_equal(result, expected)
Ejemplo n.º 7
0
 def restore_dataset(self, ds):
     """Return the original time variable.
     """
     if not self._time_computed:
         raise ValueError("time was not computed; cannot restore dataset")
     time_values = ds[self.orig_time_coord_name].values
     if self.orig_time_coord_decoded:
         time_values = xr.CFTimeIndex(
             cftime.num2date(
                 time_values,
                 units=self.time_attrs["units"],
                 calendar=self.time_attrs["calendar"],
             ))
     ds[self.time_coord_name].values = time_values
     ds = ds.drop(self.orig_time_coord_name)
     return ds
Ejemplo n.º 8
0
    def get_time_decoded(self, midpoint=True):
        """Return time decoded.
        """
        # to compute a time midpoint, we need a time_bound variable
        if midpoint and self.time_bound is None:
            raise ValueError('cannot compute time midpoint w/o time bounds')

        if midpoint:
            time_data = self.time_bound.mean(self.tb_dim)

        else:
            # if time has already been decoded and there's no year_offset,
            # just return the time as is
            if self.isdecoded(self.time):
                if self.year_offset is None:
                    return self.time

                # if we need to un-decode time to apply the year_offset,
                time_data = self.get_time_undecoded()

            # time has not been decoded
            else:
                time_data = self.time

        if self.year_offset is not None:
            time_data += cftime.date2num(
                datetime(int(self.year_offset), 1, 1),
                units=self.time_attrs['units'],
                calendar=self.time_attrs['calendar'],
            )
        time_out = self.time.copy()
        time_out.data = xr.CFTimeIndex(
            cftime.num2date(
                time_data,
                units=self.time_attrs['units'],
                calendar=self.time_attrs['calendar'],
                only_use_cftime_datetimes=True,
            )
        )
        return time_out
Ejemplo n.º 9
0
    def get_time_decoded(self, midpoint=True):
        """Return time decoded.
        """
        # to compute a time midpoint, we need a time_bound variable
        if midpoint and self.time_bound is None:
            raise ValueError("cannot compute time midpoint w/o time bounds")

        if midpoint:
            time_values = self.time_bound.mean(self.tb_dim)

        else:
            # if time has already been decoded and there's no year_offset,
            # just return the time as is
            if self.time.dtype == np.dtype("O"):
                if self.year_offset is None:
                    return time_values

                # if we need to un-decode time to apply the year_offset,
                # make sure there are units to do so with
                time_values = self.get_time_undecoded()

            # time has not been decoded
            else:
                time_values = self.time

        if self.year_offset is not None:
            time_values += cftime.date2num(
                datetime(int(self.year_offset), 1, 1),
                units=self.time_attrs["units"],
                calendar=self.time_attrs["calendar"],
            )

        time_out = xr.DataArray(self.time)
        time_out.values = xr.CFTimeIndex(
            cftime.num2date(time_values,
                            units=self.time_attrs["units"],
                            calendar=self.time_attrs["calendar"]))
        return time_out
Ejemplo n.º 10
0
def test_asi8(date_type):
    index = xr.CFTimeIndex([date_type(1970, 1, 1), date_type(1970, 1, 2)])
    result = index.asi8
    expected = 1000000 * 86400 * np.array([0, 1])
    np.testing.assert_array_equal(result, expected)
Ejemplo n.º 11
0
def convert_time_index(xobj,
                       time_string,
                       kind="object",
                       calendar=HINDCAST_CALENDAR_STR):
    """Convert incoming time index to a :py:class:`~xarray.CFTimeIndex`.

    Args:
        xobj (xarray.Dataset): with a time dimension to convert.
        time_string (str): Name of time dimension.
        kind (str): Kind of object for error message.
        calendar (str): calendar to set time dimension to.

    Returns:
        Dataset or DataArray with converted time dimension. If incoming time index is
        ``xr.CFTimeIndex``, returns the same index. If ``pd.DatetimeIndex``, converts to
        ``cftime.ProlepticGregorian``. If ``pd.Int64Index`` or ``pd.Float64Index``,
        assumes annual resolution and returns year-start ``cftime.ProlepticGregorian``.

    Raises:
        ValueError: If ``time_index`` is not an ``xr.CFTimeIndex``, ``pd.Int64Index``,
            ``pd.Float64Index``, or ``pd.DatetimeIndex``.
    """
    xobj = xobj.copy()  # Ensures that main object index is not overwritten.
    time_index = xobj[time_string].to_index()

    if not isinstance(time_index, xr.CFTimeIndex):

        if isinstance(time_index, pd.DatetimeIndex):
            # Extract year, month, day strings from datetime.
            time_strings = [str(t) for t in time_index]
            split_dates = [d.split(" ")[0].split("-") for d in time_strings]
            split_dates = [
                d.split("-") for d in time_index.strftime("%Y-%m-%d-%H-%M-%S")
            ]

        # If pd.Index, assume annual and convert accordingly.
        elif isinstance(time_index, pd.Index):
            if OPTIONS["warn_for_init_coords_int_to_annual"]:
                warnings.warn(
                    "Assuming annual resolution starting Jan 1st due to numeric inits. "
                    "Please change ``init`` to a datetime if it is another resolution. "
                    "We recommend using xr.CFTimeIndex as ``init``, see "
                    "https://climpred.readthedocs.io/en/stable/setting-up-data.html."
                )
            # TODO: What about decimal time? E.g. someone has 1955.5 or something?
            # hard to maintain a clear rule below seasonality
            dates = [str(int(t)) + "-01-01-00-00-00" for t in time_index]
            split_dates = [d.split("-") for d in dates]
            if "lead" in xobj.dims:
                # Probably the only case we can assume lead units, since `lead` does not
                # give us any information on this.
                xobj["lead"].attrs["units"] = "years"

        else:
            raise ValueError(f"Your {kind} object must be pd.Float64Index, "
                             "pd.Int64Index, xr.CFTimeIndex or "
                             "pd.DatetimeIndex.")
        cftime_dates = [
            getattr(cftime, calendar)(int(y), int(m), int(d), int(H), int(M),
                                      int(S))
            for (y, m, d, H, M, S) in split_dates
        ]
        time_index = xr.CFTimeIndex(cftime_dates)
        xobj[time_string] = time_index
        if time_string == "time":
            xobj["time"].attrs.setdefault("long_name", "time")
            xobj["time"].attrs.setdefault("standard_name", "time")

    return xobj
Ejemplo n.º 12
0
                                    np.datetime64('2000-01'))
_NON_CFTIME_DATES = {
    'datetime.datetime': datetime.datetime(2000, 1, 1),
    'np.datetime64': np.datetime64('2000-01-01'),
    'str': '2000'
}
for date_label, date in _NON_CFTIME_DATES.items():
    key = 'DatetimeIndex-{}'.format(date_label)
    if isinstance(date, str):
        _DATETIME_CONVERT_TESTS[key] = (_DATETIME_INDEX, date, date)
    else:
        _DATETIME_CONVERT_TESTS[key] = (_DATETIME_INDEX, date,
                                        np.datetime64('2000-01'))

_CFTIME_INDEXES = {
    'CFTimeIndex[{}]'.format(key): xr.CFTimeIndex([value(1, 1, 1)])
    for key, value in _CFTIME_DATE_TYPES.items()
}
_CFTIME_CONVERT_TESTS = {}
for ((index_label, index), (date_label,
                            date_type)) in product(_CFTIME_INDEXES.items(),
                                                   _CFTIME_DATE_TYPES.items()):
    key = '{}-{}'.format(index_label, date_label)
    _CFTIME_CONVERT_TESTS[key] = (index, date_type(1, 1, 1),
                                  index.date_type(1, 1, 1))
_NON_CFTIME_DATES_0001 = {
    'datetime.datetime': datetime.datetime(1, 1, 1),
    'np.datetime64': np.datetime64('0001-01-01'),
    'str': '0001'
}
for ((idx_label, index), (date_label,
Ejemplo n.º 13
0
def date_range(start=None, end=None, periods=None, freq='D', tz=None,
               normalize=False, name=None, closed=None, calendar='standard',
               **kwargs):
    ''' Return a fixed frequency datetime index, with day (calendar) as the
    default frequency

    Parameters
    ----------
    start : string or datetime-like, default None
        Left bound for generating dates
    end : string or datetime-like, default None
        Right bound for generating dates
    periods : integer or None, default None
        If None, must specify start and end
    freq : string or DateOffset, default 'D' (calendar daily)
        Frequency strings can have multiples, e.g. '5H'
    tz : string or None
        Time zone name for returning localized DatetimeIndex, for example
        Asia/Hong_Kong
    normalize : bool, default False
        Normalize start/end dates to midnight before generating date range
    name : str, default None
        Name of the resulting index
    closed : string or None, default None
        Make the interval closed with respect to the given frequency to
        the 'left', 'right', or both sides (None)
    calendar : string
        Describes the calendar used in the time calculations. Default is a the
        standard calendar (with leap years)

    Notes
    -----
    2 of start, end, or periods must be specified
    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

    Returns
    -------
    rng : DatetimeIndex
    '''
    if calendar in ['standard', 'gregorian', 'propoleptic_gregorian']:
        return pd.date_range(start=start, end=end, periods=periods,
                             freq=freq, tz=tz, normalize=normalize, name=name,
                             closed=closed, **kwargs)
    else:
        # start and end are give
        if (start is not None) and (end is not None) and (periods is None):
            steps, units = decode_freq(freq)
            start_num, end_num = date2num(
                pd.to_datetime([start, end]).to_pydatetime(),
                units, calendar=calendar)
            periods = int((end_num - start_num) / steps) + 1

            times = num2date(
                np.linspace(start_num, end_num, periods,
                            endpoint=True,
                            dtype=np.float128), units, calendar)
            index = xr.CFTimeIndex(times).to_datetimeindex()
            return index

        else:
            raise NotImplementedError(
                'Specified arguments are not valid for this calendar')