def test_autoreg_append_deterministic(append_data): y = append_data.y y_oos = append_data.y_oos y_both = append_data.y_both x = append_data.x x_oos = append_data.x_oos x_both = append_data.x_both terms = [TimeTrend(constant=True, order=1), Seasonality(12)] dp = DeterministicProcess(y.index, additional_terms=terms) res = AutoReg(y, lags=3, trend="n", deterministic=dp).fit() res_append = res.append(y_oos, refit=True) res_direct = AutoReg(y_both, lags=3, trend="n", deterministic=dp.apply(y_both.index)).fit() assert_allclose(res_append.params, res_direct.params) res_np = AutoReg(np.asarray(y), lags=3, trend="n", deterministic=dp).fit() res_append_np = res_np.append(np.asarray(y_oos)) assert_allclose(res_np.params, res_append_np.params) res = AutoReg(y, exog=x, lags=3, trend="n", deterministic=dp).fit() res_append = res.append(y_oos, exog=x_oos, refit=True) res_direct = AutoReg( y_both, exog=x_both, lags=3, trend="n", deterministic=dp.apply(y_both.index), ).fit() assert_allclose(res_append.params, res_direct.params)
def test_drop_two_consants(time_index): tt = TimeTrend(constant=True, order=1) dp = DeterministicProcess( time_index, constant=True, additional_terms=[tt], drop=True ) assert dp.in_sample().shape[1] == 2 dp2 = DeterministicProcess(time_index, additional_terms=[tt], drop=True) pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
def test_deterministic(reset_randomstate): y = pd.Series(np.random.normal(size=200)) terms = [TimeTrend(constant=True, order=1), Seasonality(12)] dp = DeterministicProcess(y.index, additional_terms=terms) m = AutoReg(y, trend="n", seasonal=False, lags=2, deterministic=dp) res = m.fit() m2 = AutoReg(y, trend="ct", seasonal=True, lags=2, period=12) res2 = m2.fit() assert_almost_equal(np.asarray(res.params), np.asarray(res2.params)) with pytest.warns(RuntimeWarning, match="When using deterministic, trend"): AutoReg(y, trend="ct", seasonal=False, lags=2, deterministic=dp)
def test_drop(): index = pd.RangeIndex(0, 200) dummy = DummyTerm() str(dummy) assert dummy != TimeTrend() dp = DeterministicProcess(index, additional_terms=[dummy], drop=True) in_samp = dp.in_sample() assert in_samp.shape == (200, 4) oos = dp.out_of_sample(37) assert oos.shape == (37, 4) assert list(oos.columns) == list(in_samp.columns) valid = ("const", "trend", "dummy", "normal") for valid_col in valid: assert sum([1 for col in oos if valid_col in col]) == 1
def test_additional_terms(time_index): add_terms = [TimeTrend(True, order=1)] dp = DeterministicProcess(time_index, additional_terms=add_terms) dp2 = DeterministicProcess(time_index, constant=True, order=1) pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample()) with pytest.raises(ValueError, match="One or more terms in additional_terms"): DeterministicProcess(time_index, additional_terms=add_terms + add_terms) with pytest.raises(ValueError, match="One or more terms in additional_terms"): DeterministicProcess(time_index, constant=True, order=1, additional_terms=add_terms)
# This is equivalent to using the integer values 58 and 70. det_proc.range(58, 70) # ## Advanced Construction # # Deterministic processes with features not supported directly through the # constructor can be created using `additional_terms` which accepts a list # of `DetermisticTerm`. Here we create a deterministic process with two # seasonal components: day-of-week with a 5 day period and an annual # captured through a Fourier component with a period of 365.25 days. from statsmodels.tsa.deterministic import Fourier, Seasonality, TimeTrend index = pd.period_range("2020-03-01", freq="D", periods=2 * 365) tt = TimeTrend(constant=True) four = Fourier(period=365.25, order=2) seas = Seasonality(period=7) det_proc = DeterministicProcess(index, additional_terms=[tt, seas, four]) det_proc.in_sample().head(28) # ## Custom Deterministic Terms # # The `DetermisticTerm` Abstract Base Class is designed to be subclassed # to help users write custom deterministic terms. We next show two # examples. The first is a broken time trend that allows a break after a # fixed number of periods. The second is a "trick" deterministic term that # allows exogenous data, which is not really a deterministic process, to be # treated as if was deterministic. This lets use simplify gathering the # terms needed for forecasting. #
def test_time_trend_smoke(index, forecast_index): tt = TimeTrend(True, 2) tt.in_sample(index) steps = 83 if forecast_index is None else len(forecast_index) warn = None if type(index) is NumericIndex and np.any(np.diff(index) != 1): warn = UserWarning with pytest.warns(warn): tt.out_of_sample(steps, index, forecast_index) str(tt) hash(tt) assert isinstance(tt.order, int) assert isinstance(tt._constant, bool) assert TimeTrend.from_string("ctt") == tt assert TimeTrend.from_string("ct") != tt assert TimeTrend.from_string("t") != tt assert TimeTrend.from_string("n") != tt assert Seasonality(12) != tt tt0 = TimeTrend(False, 0) tt0.in_sample(index) str(tt0)
def test_time_trend(index): tt = TimeTrend(constant=True) const = tt.in_sample(index) assert const.shape == (index.shape[0], 1) assert np.all(const == 1) pd.testing.assert_index_equal(const.index, index) warn = None if type(index) is NumericIndex and np.any(np.diff(index) != 1): warn = UserWarning with pytest.warns(warn): const_fcast = tt.out_of_sample(23, index) assert np.all(const_fcast == 1) tt = TimeTrend(constant=False) empty = tt.in_sample(index) assert empty.shape == (index.shape[0], 0) tt = TimeTrend(constant=False, order=2) t2 = tt.in_sample(index) assert t2.shape == (index.shape[0], 2) assert list(t2.columns) == ["trend", "trend_squared"] tt = TimeTrend(constant=True, order=2) final = tt.in_sample(index) expected = pd.concat([const, t2], axis=1) pd.testing.assert_frame_equal(final, expected) tt = TimeTrend(constant=True, order=2) short = tt.in_sample(index[:-50]) with pytest.warns(warn): remainder = tt.out_of_sample(50, index[:-50]) with pytest.warns(warn): direct = tt.out_of_sample(steps=50, index=index[:-50], forecast_index=index[-50:]) combined = pd.concat([short, remainder], axis=0) if isinstance(index, (pd.DatetimeIndex, pd.RangeIndex)): pd.testing.assert_frame_equal(combined, final) combined = pd.concat([short, direct], axis=0) pd.testing.assert_frame_equal(combined, final, check_index_type=False)
def test_invalid_formcast_index(index): tt = TimeTrend(order=4) with pytest.raises(ValueError, match="The number of values in forecast_"): tt.out_of_sample(10, index, pd.RangeIndex(11))