def test_determintic_term_equiv(index): base = DeterministicProcess(pd.RangeIndex(0, 200), constant=True, order=2) dp = DeterministicProcess(index, constant=True, order=2) np.testing.assert_array_equal(base.in_sample(), dp.in_sample()) np.testing.assert_array_equal(base.out_of_sample(37), dp.out_of_sample(37)) np.testing.assert_array_equal(base.range(200, 237), dp.range(200, 237)) np.testing.assert_array_equal(base.range(50, 150), dp.range(50, 150)) np.testing.assert_array_equal(base.range(50, 250), dp.range(50, 250))
def test_drop_two_consants(time_index): tt = TimeTrend(constant=True, order=1) dp = DeterministicProcess( time_index, constant=True, additional_terms=[tt], drop=True ) assert dp.in_sample().shape[1] == 2 dp2 = DeterministicProcess(time_index, additional_terms=[tt], drop=True) pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
def test_range_casting(): idx = np.arange(120) dp = DeterministicProcess( idx, constant=True, order=1, seasonal=True, period=12 ) idx = pd.RangeIndex(0, 120) dp2 = DeterministicProcess( idx, constant=True, order=1, seasonal=True, period=12 ) pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample()) pd.testing.assert_frame_equal(dp.range(100, 150), dp2.range(100, 150))
def test_additional_terms(time_index): add_terms = [TimeTrend(True, order=1)] dp = DeterministicProcess(time_index, additional_terms=add_terms) dp2 = DeterministicProcess(time_index, constant=True, order=1) pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample()) with pytest.raises(ValueError, match="One or more terms in additional_terms"): DeterministicProcess(time_index, additional_terms=add_terms + add_terms) with pytest.raises(ValueError, match="One or more terms in additional_terms"): DeterministicProcess(time_index, constant=True, order=1, additional_terms=add_terms)
def check(self, dp, X): from statsmodels.tsa.deterministic import (CalendarFourier, DeterministicProcess) y = load_average_sales()['2017'] fourier = CalendarFourier(freq='M', order=4) dp = DeterministicProcess( index=y.index, constant=True, order=1, seasonal=True, additional_terms=[fourier], drop=True, ) X_true = dp.in_sample() import pandas as pd assert all( dp._index == y.index ), f"`index` argument to `DeterministicProcess` should be `y.index`. You gave {dp._index}." assert dp._constant, f"`constant` argument to `DeterministicProcess` should be `True`. You gave {dp._constant}." assert dp._order == 1, f"`order` argument to `DeterministicProcess` should be `1`. You gave {dp._order}." assert dp._seasonal, f"`seasonal` argument to `DeterministicProcess` should be `True`. You gave {dp._seasonal}." assert len( dp._additional_terms ) == 1, f"`additional_terms` argument to `DeterministicProcess` should be `[fourier]`. You gave {dp._additional_terms}." assert isinstance( dp._additional_terms[0], CalendarFourier ), f"`additional_terms` argument to `DeterministicProcess` should be `[fourier]`. You gave {dp._additional_terms}." assert dp._additional_terms[ 0]._order == 4, f"`order` argument to `CalendarFourier` should be `4`. You gave {dp._additional_terms[0]._order}." assert isinstance( dp._additional_terms[0]._freq, pd.offsets.MonthEnd ), f"`freq` argument to `CalendarFourier` should be `'M'`." assert dp._drop, f"`additional_terms` argument to `DeterministicProcess` should be `True`. You gave {dp._drop}." assert_equal(X, X_true, 'X')
def test_drop(): index = pd.RangeIndex(0, 200) dummy = DummyTerm() str(dummy) assert dummy != TimeTrend() dp = DeterministicProcess(index, additional_terms=[dummy], drop=True) in_samp = dp.in_sample() assert in_samp.shape == (200, 4) oos = dp.out_of_sample(37) assert oos.shape == (37, 4) assert list(oos.columns) == list(in_samp.columns) valid = ("const", "trend", "dummy", "normal") for valid_col in valid: assert sum([1 for col in oos if valid_col in col]) == 1
def test_deterministic_process(time_index, constant, order, seasonal, fourier, period, drop): if seasonal and fourier: return dp = DeterministicProcess( time_index, constant=constant, order=order, seasonal=seasonal, fourier=fourier, period=period, drop=drop, ) terms = dp.in_sample() pd.testing.assert_index_equal(terms.index, time_index) terms = dp.out_of_sample(23) assert isinstance(terms, pd.DataFrame)
# The process requires an index, which is the index of the full-sample (or # in-sample). # # First, we initialize a deterministic process with a constant, a linear # time trend, and a 5-period seasonal term. The `in_sample` method returns # the full set of values that match the index. from statsmodels.tsa.deterministic import DeterministicProcess index = pd.RangeIndex(0, 100) det_proc = DeterministicProcess(index, constant=True, order=1, seasonal=True, period=5) det_proc.in_sample() # The `out_of_sample` returns the next `steps` values after the end of the # in-sample. det_proc.out_of_sample(15) # `range(start, stop)` can also be used to produce the deterministic terms # over any range including in- and out-of-sample. # # ### Notes # # * When the index is a pandas `DatetimeIndex` or a `PeriodIndex`, then # `start` and `stop` can be date-like (strings, e.g., "2020-06-01", or # Timestamp) or integers. # * `stop` is always included in the range. While this is not very