Exemple #1
0
def test_determintic_term_equiv(index):
    base = DeterministicProcess(pd.RangeIndex(0, 200), constant=True, order=2)
    dp = DeterministicProcess(index, constant=True, order=2)
    np.testing.assert_array_equal(base.in_sample(), dp.in_sample())
    np.testing.assert_array_equal(base.out_of_sample(37), dp.out_of_sample(37))
    np.testing.assert_array_equal(base.range(200, 237), dp.range(200, 237))
    np.testing.assert_array_equal(base.range(50, 150), dp.range(50, 150))
    np.testing.assert_array_equal(base.range(50, 250), dp.range(50, 250))
def test_drop_two_consants(time_index):
    tt = TimeTrend(constant=True, order=1)
    dp = DeterministicProcess(
        time_index, constant=True, additional_terms=[tt], drop=True
    )
    assert dp.in_sample().shape[1] == 2
    dp2 = DeterministicProcess(time_index, additional_terms=[tt], drop=True)
    pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
def test_range_casting():
    idx = np.arange(120)
    dp = DeterministicProcess(
        idx, constant=True, order=1, seasonal=True, period=12
    )
    idx = pd.RangeIndex(0, 120)
    dp2 = DeterministicProcess(
        idx, constant=True, order=1, seasonal=True, period=12
    )
    pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
    pd.testing.assert_frame_equal(dp.range(100, 150), dp2.range(100, 150))
Exemple #4
0
def test_additional_terms(time_index):
    add_terms = [TimeTrend(True, order=1)]
    dp = DeterministicProcess(time_index, additional_terms=add_terms)
    dp2 = DeterministicProcess(time_index, constant=True, order=1)
    pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
    with pytest.raises(ValueError,
                       match="One or more terms in additional_terms"):
        DeterministicProcess(time_index,
                             additional_terms=add_terms + add_terms)
    with pytest.raises(ValueError,
                       match="One or more terms in additional_terms"):
        DeterministicProcess(time_index,
                             constant=True,
                             order=1,
                             additional_terms=add_terms)
Exemple #5
0
    def check(self, dp, X):
        from statsmodels.tsa.deterministic import (CalendarFourier,
                                                   DeterministicProcess)
        y = load_average_sales()['2017']
        fourier = CalendarFourier(freq='M', order=4)
        dp = DeterministicProcess(
            index=y.index,
            constant=True,
            order=1,
            seasonal=True,
            additional_terms=[fourier],
            drop=True,
        )
        X_true = dp.in_sample()

        import pandas as pd
        assert all(
            dp._index == y.index
        ), f"`index` argument to `DeterministicProcess` should be `y.index`. You gave {dp._index}."
        assert dp._constant, f"`constant` argument to `DeterministicProcess` should be `True`. You gave {dp._constant}."
        assert dp._order == 1, f"`order` argument to `DeterministicProcess` should be `1`. You gave {dp._order}."
        assert dp._seasonal, f"`seasonal` argument to `DeterministicProcess` should be `True`. You gave {dp._seasonal}."
        assert len(
            dp._additional_terms
        ) == 1, f"`additional_terms` argument to `DeterministicProcess` should be `[fourier]`. You gave {dp._additional_terms}."
        assert isinstance(
            dp._additional_terms[0], CalendarFourier
        ), f"`additional_terms` argument to `DeterministicProcess` should be `[fourier]`. You gave {dp._additional_terms}."
        assert dp._additional_terms[
            0]._order == 4, f"`order` argument to `CalendarFourier` should be `4`. You gave {dp._additional_terms[0]._order}."
        assert isinstance(
            dp._additional_terms[0]._freq, pd.offsets.MonthEnd
        ), f"`freq` argument to `CalendarFourier` should be `'M'`."
        assert dp._drop, f"`additional_terms` argument to `DeterministicProcess` should be `True`. You gave {dp._drop}."
        assert_equal(X, X_true, 'X')
Exemple #6
0
def test_drop():
    index = pd.RangeIndex(0, 200)
    dummy = DummyTerm()
    str(dummy)
    assert dummy != TimeTrend()
    dp = DeterministicProcess(index, additional_terms=[dummy], drop=True)
    in_samp = dp.in_sample()
    assert in_samp.shape == (200, 4)
    oos = dp.out_of_sample(37)
    assert oos.shape == (37, 4)
    assert list(oos.columns) == list(in_samp.columns)
    valid = ("const", "trend", "dummy", "normal")
    for valid_col in valid:
        assert sum([1 for col in oos if valid_col in col]) == 1
Exemple #7
0
def test_deterministic_process(time_index, constant, order, seasonal, fourier,
                               period, drop):
    if seasonal and fourier:
        return
    dp = DeterministicProcess(
        time_index,
        constant=constant,
        order=order,
        seasonal=seasonal,
        fourier=fourier,
        period=period,
        drop=drop,
    )
    terms = dp.in_sample()
    pd.testing.assert_index_equal(terms.index, time_index)
    terms = dp.out_of_sample(23)
    assert isinstance(terms, pd.DataFrame)
# The process requires an index, which is the index of the full-sample (or
# in-sample).
#
# First, we initialize a deterministic process with a constant, a linear
# time trend, and a 5-period seasonal term. The `in_sample` method returns
# the full set of values that match the index.

from statsmodels.tsa.deterministic import DeterministicProcess

index = pd.RangeIndex(0, 100)
det_proc = DeterministicProcess(index,
                                constant=True,
                                order=1,
                                seasonal=True,
                                period=5)
det_proc.in_sample()

# The `out_of_sample` returns the next `steps` values after the end of the
# in-sample.

det_proc.out_of_sample(15)

# `range(start, stop)` can also be used to produce the deterministic terms
# over any range including in- and out-of-sample.
#
# ### Notes
#
# * When the index is a pandas `DatetimeIndex` or a `PeriodIndex`, then
# `start` and `stop` can be date-like (strings, e.g., "2020-06-01", or
# Timestamp) or integers.
# * `stop` is always included in the range. While this is not very