Exemple #1
0
q_values = range(0, 3)

S = range(0, 3)
s_values = [0, 11, 77]

warnings.filterwarnings("ignore")

hourly_arima_res = evaluate_pdq(bestillingHourlySeries, p_values, d_values,
                                q_values)
hourly_sarimax_res = evaluate_PDQs(bestillingHourlySeries, S, S, S, s_values,
                                   hourly_arima_res[0])

# # --- SARIMAX: how the best model looks like for daily calls on all the dataset ---
daily_model = sx.SARIMAX(bestillingDailySeries,
                         exog=None,
                         order=(7, 1, 0),
                         seasonal_order=(1, 1, 1, 7),
                         trend='t')
daily_model_fit = daily_model.fit(disp=0)
yhat = daily_model_fit.fittedvalues
print(daily_model_fit.summary())
# plot residual errors
residuals = DataFrame(daily_model_fit.resid)
residuals.plot()
pyplot.suptitle('Residuals for SARIMAX(7,1,0)(1,1,1,7)')
pyplot.show()
residuals.plot(kind='kde')
pyplot.suptitle('Residuals for SARIMAX(7,1,0)(1,1,1,7)')
pyplot.show()
print(residuals.describe())
pyplot.plot(bestillingDailySeries, 'k-', label='actual calls', alpha=0.7)
Exemple #2
0
def test_varmax():
    # Clear warnings
    varmax.__warningregistry__ = {}

    np.random.seed(371934)
    nobs = 100
    eps = np.random.normal(size=nobs)
    exog = np.random.normal(size=(nobs, 1))

    eps1 = np.zeros(nobs)
    eps2 = np.zeros(nobs)
    eps2[49] = 1
    eps3 = np.zeros(nobs)
    eps3[50:] = 1

    # VAR(2) - single series
    mod1 = varmax.VARMAX([[0]], order=(2, 0), trend='nc')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 0))
    actual = mod1.simulate([0.5, 0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VMA(2) - single series
    mod1 = varmax.VARMAX([[0]], order=(0, 2), trend='nc')
    mod2 = sarimax.SARIMAX([0], order=(0, 0, 2))
    actual = mod1.simulate([0.5, 0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VARMA(2, 2) - single series
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='nc')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 2))
    actual = mod1.simulate([0.5, 0.2, 0.1, -0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 0.2, 0.1, -0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VARMA(2, 2) + trend - single series
    mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='c')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 2), trend='c')
    actual = mod1.simulate([10, 0.5, 0.2, 0.1, -0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([10, 0.5, 0.2, 0.1, -0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VAR(1)
    transition = np.array([[0.5, 0.1], [-0.1, 0.2]])

    mod = varmax.VARMAX([[0, 0]], order=(1, 0), trend='nc')
    actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.],
                          nobs,
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, 0)

    actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.],
                          nobs,
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=[1, 1])
    desired = np.zeros((nobs, 2))
    state = np.r_[1, 1]
    for i in range(nobs):
        desired[i] = state
        state = np.dot(transition, state)
    assert_allclose(actual, desired)

    # VAR(1) + measurement error
    mod = varmax.VARMAX([[0, 0]],
                        order=(1, 0),
                        trend='nc',
                        measurement_error=True)
    actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1., 1., 1.],
                          nobs,
                          measurement_shocks=np.c_[eps, eps],
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, np.c_[eps, eps])

    # VARX(1)
    mod = varmax.VARMAX(np.zeros((nobs, 2)),
                        order=(1, 0),
                        trend='nc',
                        exog=exog)
    actual = mod.simulate(np.r_[transition.ravel(), 5, -2, 1., 0, 1.],
                          nobs,
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=[1, 1])
    desired = np.zeros((nobs, 2))
    state = np.r_[1, 1]
    for i in range(nobs):
        desired[i] = state
        state = exog[i] * [5, -2] + np.dot(transition, state)
    assert_allclose(actual, desired)

    # VMA(1)
    # TODO: This is just a smoke test
    mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)),
                        order=(0, 1),
                        trend='nc')
    mod.simulate(mod.start_params, nobs)

    # VARMA(2, 2) + trend + exog
    # TODO: This is just a smoke test
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)),
                            order=(2, 2),
                            trend='c',
                            exog=exog)
    mod.simulate(mod.start_params, nobs)
Exemple #3
0
def test_simulate():
    # Test for simulation of new time-series
    from scipy.signal import lfilter

    # Common parameters
    nsimulations = 10
    sigma2 = 2
    measurement_shocks = np.zeros(nsimulations)
    state_shocks = np.random.normal(scale=sigma2**0.5, size=nsimulations)

    # Random walk model, so simulated series is just the cumulative sum of
    # the shocks
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.

    actual = mod.simulate(nsimulations,
                          measurement_shocks=measurement_shocks,
                          state_shocks=state_shocks)[0].squeeze()
    desired = np.r_[0, np.cumsum(state_shocks)[:-1]]

    assert_allclose(actual, desired)

    # Local level model, so simulated series is just the cumulative sum of
    # the shocks plus the measurement shock
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.

    actual = mod.simulate(nsimulations,
                          measurement_shocks=np.ones(nsimulations),
                          state_shocks=state_shocks)[0].squeeze()
    desired = np.r_[1, np.cumsum(state_shocks)[:-1] + 1]

    assert_allclose(actual, desired)

    # Local level-like model with observation and state intercepts, so
    # simulated series is just the cumulative sum of the shocks minus the state
    # intercept, plus the observation intercept and the measurement shock
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['obs_intercept', 0, 0] = 5.
    mod['design', 0, 0] = 1.
    mod['state_intercept', 0, 0] = -2.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.

    actual = mod.simulate(nsimulations,
                          measurement_shocks=np.ones(nsimulations),
                          state_shocks=state_shocks)[0].squeeze()
    desired = np.r_[1 + 5, np.cumsum(state_shocks - 2)[:-1] + 1 + 5]

    assert_allclose(actual, desired)

    # Model with time-varying observation intercept
    mod = KalmanFilter(k_endog=1, k_states=1, nobs=10)
    mod['obs_intercept'] = (np.arange(10) * 1.).reshape(1, 10)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.

    actual = mod.simulate(nsimulations,
                          measurement_shocks=measurement_shocks,
                          state_shocks=state_shocks)[0].squeeze()
    desired = np.r_[0, np.cumsum(state_shocks)[:-1] + np.arange(1, 10)]

    assert_allclose(actual, desired)

    # Model with time-varying observation intercept, check that error is raised
    # if more simulations are requested than are nobs.
    mod = KalmanFilter(k_endog=1, k_states=1, nobs=10)
    mod['obs_intercept'] = (np.arange(10) * 1.).reshape(1, 10)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.
    assert_raises(ValueError, mod.simulate, nsimulations + 1,
                  measurement_shocks, state_shocks)

    # ARMA(1, 1): phi = [0.1], theta = [0.5], sigma^2 = 2
    phi = 0.1
    theta = 0.5
    mod = sarimax.SARIMAX([0], order=(1, 0, 1))
    mod.update(np.r_[phi, theta, sigma2])

    actual = mod.ssm.simulate(nsimulations,
                              measurement_shocks=measurement_shocks,
                              state_shocks=state_shocks,
                              initial_state=np.zeros(
                                  mod.k_states))[0].squeeze()
    desired = lfilter([1, theta], [1, -phi], np.r_[0, state_shocks[:-1]])

    assert_allclose(actual, desired)

    # SARIMAX(1, 0, 1)x(1, 0, 1, 4), this time using the results object call
    mod = sarimax.SARIMAX([0.1, 0.5, -0.2],
                          order=(1, 0, 1),
                          seasonal_order=(1, 0, 1, 4))
    res = mod.filter([0.1, 0.5, 0.2, -0.3, 1])

    actual = res.simulate(nsimulations,
                          measurement_shocks=measurement_shocks,
                          state_shocks=state_shocks,
                          initial_state=np.zeros(mod.k_states))
    desired = lfilter(res.polynomial_reduced_ma, res.polynomial_reduced_ar,
                      np.r_[0, state_shocks[:-1]])

    assert_allclose(actual, desired)
Exemple #4
0
def test_start_end_int(which):
    endog = dta['infl'].copy()
    nobs = len(endog)
    if which == 'range':
        endog.index = pd.RangeIndex(nobs)
        endog_init = endog.iloc[:-1]
        index_plus2 = pd.RangeIndex(nobs + 2)
    if which == 'range2':
        endog.index = pd.RangeIndex(stop=nobs * 2, step=2)
        endog_init = endog.iloc[:-1]
        index_plus2 = pd.RangeIndex((nobs + 2) * 2, step=2)
    elif which == 'int64':
        endog.index = NumericIndex(np.arange(nobs))
        endog_init = endog.iloc[:-1]
        index_plus2 = NumericIndex(np.arange(nobs + 2))
    elif which == 'numpy':
        endog = endog.values
        endog_init = endog[:-1]
        index_plus2 = pd.RangeIndex(nobs + 2)
    elif which == 'list':
        endog = endog.tolist()
        endog_init = endog[:-1]
        index_plus2 = pd.RangeIndex(nobs + 2)

    mod = sarimax.SARIMAX(endog_init)
    res = mod.smooth([0.5, 1.0])

    # Default is the last in-sample period
    news = res.news(endog)
    desired = index_plus2[-4:-3]
    assert_(news.total_impacts.index.equals(desired))

    # Start, periods
    news = res.news(endog, start=mod.nobs - 1, periods=1)
    desired = index_plus2[-4:-3]
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, start=mod.nobs - 2, periods=2)
    desired = index_plus2[-5:-3]
    assert_(news.total_impacts.index.equals(desired))

    # End, periods
    news = res.news(endog, end=mod.nobs - 1, periods=1)
    desired = index_plus2[-4:-3]
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, end=mod.nobs - 2, periods=2)
    desired = index_plus2[-6:-4]
    assert_(news.total_impacts.index.equals(desired))

    # Start, end
    # Note: end is inclusive, like `get_prediction`.
    news = res.news(endog, start=mod.nobs - 2, end=mod.nobs - 1)
    desired = index_plus2[-5:-3]
    assert_(news.total_impacts.index.equals(desired))
    if which not in ['numpy', 'list']:
        predicted = res.predict(start=mod.nobs - 2, end=mod.nobs - 1)
        assert_(news.total_impacts.index.equals(predicted.index))

    news = res.news(endog, start=mod.nobs, end=mod.nobs)
    desired = index_plus2[-3:-2]
    assert_(news.total_impacts.index.equals(desired))
    if which not in ['numpy', 'list']:
        predicted = res.predict(start=mod.nobs, end=mod.nobs)
        assert_(news.total_impacts.index.equals(predicted.index))

    news = res.news(endog, start=mod.nobs, end=mod.nobs + 1)
    desired = index_plus2[-3:-1]
    assert_(news.total_impacts.index.equals(desired))
    if which not in ['numpy', 'list']:
        predicted = res.predict(start=mod.nobs, end=mod.nobs + 1)
        assert_(news.total_impacts.index.equals(predicted.index))
def test_mixed_stationary():
    # More specific tests when one or more blocks are initialized as stationary
    endog = np.zeros(10)
    mod = sarimax.SARIMAX(endog, order=(2, 1, 0))
    phi = [0.5, -0.2]
    sigma2 = 2.
    mod.update(np.r_[phi, sigma2])

    init = Initialization(mod.k_states)
    init.set(0, 'diffuse')
    init.set((1, 3), 'stationary')
    desired_cov = np.zeros((3, 3))
    T = np.array([[0.5, 1], [-0.2, 0]])
    Q = np.diag([sigma2, 0])
    desired_cov[1:, 1:] = solve_discrete_lyapunov(T, Q)
    check_initialization(mod, init, [0, 0, 0], np.diag([1, 0, 0]), desired_cov)

    init.clear()
    init.set(0, 'diffuse')
    init.set(1, 'stationary')
    init.set(2, 'approximate_diffuse')
    T = np.array([[0.5]])
    Q = np.diag([sigma2])
    desired_cov = np.diag([0, solve_discrete_lyapunov(T, Q), 1e6])
    check_initialization(mod, init, [0, 0, 0], np.diag([1, 0, 0]), desired_cov)

    init.clear()
    init.set(0, 'diffuse')
    init.set(1, 'stationary')
    init.set(2, 'stationary')
    desired_cov[2, 2] = 0
    check_initialization(mod, init, [0, 0, 0], np.diag([1, 0, 0]), desired_cov)

    # Test with a VAR model
    endog = np.zeros((10, 2))
    mod = varmax.VARMAX(
        endog,
        order=(1, 0),
    )
    intercept = [1.5, -0.1]
    transition = np.array([[0.5, -0.2], [0.1, 0.8]])
    cov = np.array([[1.2, -0.4], [-0.4, 0.4]])
    tril = np.tril_indices(2)
    params = np.r_[intercept,
                   transition.ravel(),
                   np.linalg.cholesky(cov)[tril]]
    mod.update(params)

    # > stationary, global
    init = Initialization(mod.k_states, 'stationary')
    desired_intercept = np.linalg.solve(np.eye(2) - transition, intercept)
    desired_cov = solve_discrete_lyapunov(transition, cov)
    check_initialization(mod, init, desired_intercept, np.diag([0, 0]),
                         desired_cov)

    # > diffuse, global
    init.set(None, 'diffuse')
    check_initialization(mod, init, [0, 0], np.eye(2), np.diag([0, 0]))

    # > stationary, individually
    init.unset(None)
    init.set(0, 'stationary')
    init.set(1, 'stationary')
    a, Pinf, Pstar = init(model=mod)
    desired_intercept = [
        intercept[0] / (1 - transition[0, 0]),
        intercept[1] / (1 - transition[1, 1])
    ]
    desired_cov = np.diag([
        cov[0, 0] / (1 - transition[0, 0]**2),
        cov[1, 1] / (1 - transition[1, 1]**2)
    ])
    check_initialization(mod, init, desired_intercept, np.diag([0, 0]),
                         desired_cov)
def sarimax_model(timeseries, seasonality_idx, mdl_order, fcst_window, ts_start, ts_end, verbose, exog=None):

    sarimax_fcst = pd.DataFrame()

    if exog is None:

        try:
            mod = smsar.SARIMAX(endog=timeseries,
                                trend='n',
                                order=mdl_order.order,
                                seasonal_order=mdl_order.sorder+seasonality_idx)

            sarimax_mdl = mod.fit(disp=False)
            if verbose is True:
                print("SARIMA Info: Default Params - Endogenous Mode")

        except ValueError:
            mod = smsar.SARIMAX(endog=timeseries,
                                trend='n',
                                order=mdl_order.order,
                                seasonal_order=(0, 1, 0, seasonality_idx[0]))
            sarimax_mdl = mod.fit(disp=False)
            if verbose is True:
                print("SARIMA Info: Custom Params - Endogenous Mode")

        sarimax_rslt = sarimax_mdl.predict(alpha=0.05,
                                           start=0,
                                           end=(len(timeseries)-1)+fcst_window)
        sarimax_rslt[12] = np.mean([sarimax_rslt[12-1], sarimax_rslt[12+1]])  # PATCH for buggy value
        sarimax_rslt_info = sarimax_mdl.get_prediction(end=(len(timeseries)-1)+fcst_window)
        sarimax_ci = sarimax_rslt_info.conf_int(alpha=0.05)
        sarimax_ci.columns = ['lower', 'upper']
        sarimax_ci.lower[12] = np.mean([sarimax_ci.lower[12 - 1], sarimax_ci.lower[12 + 1]])
        sarimax_ci.upper[12] = np.mean([sarimax_ci.upper[12 - 1], sarimax_ci.upper[12 + 1]])  # End of PATCH
        sarimax_fcst = pd.concat([timeseries, sarimax_rslt, sarimax_ci], axis=1)
        sarimax_fcst.columns = ['Actual', 'Forecast', 'CI Lower Bound', 'CI Upper Bound']
        # y_pred = sarimax_fcst.loc[ts_start:ts_end]
        # sarimax_mase = mase_score(timeseries, y_pred.Forecast, seasonality_idx, 1)  # remove first buggy value
        # sarimax_mape = mape_score(timeseries, y_pred.Forecast, 1)  # remove first buggy value
        # print("MASE Score = {0:.2f}, MAPE Score = {1:.2f}".format(sarimax_mase, sarimax_mape))
        # if sarimax_mase < 1 and sarimax_mape < 10:
        #     print("SARIMA Info: Forecasting Accuracy is OK")
        # else:
        #     print("SARIMA Info: Forecasting Accuracy is not OK, check the forecast results")

    elif exog is not None:
        # shape exogenous time serie for past values
        ts_start_exog = pd.to_datetime([ts_start])
        ts_end_exog = pd.to_datetime([ts_end])
        ts_exog_past = exog[ts_start_exog[0]:ts_end_exog[0]]

        try:
            mod = smsar.SARIMAX(endog=timeseries,
                                exog=ts_exog_past,
                                trend='n',
                                order=mdl_order.order,
                                seasonal_order=mdl_order.sorder + seasonality_idx)

            sarimax_mdl = mod.fit(disp=False)
            if verbose is True:
                print("SARIMA Info: Default Params - Exogenous Mode")

        except ValueError:
            mod = smsar.SARIMAX(endog=timeseries,
                                exog=ts_exog_past,
                                trend='n',
                                order=mdl_order.order,
                                seasonal_order=(0, 1, 0, seasonality_idx[0]))

            sarimax_mdl = mod.fit(disp=False)
            if verbose is True:
                print("SARIMA Info: Custom Params - Exogenous Mode")

        # shape exogenous time serie for future values
        ts_start_exog = pd.to_datetime([ts_end]) + DateOffset(months=1)
        ts_end_exog = pd.to_datetime([ts_end]) + DateOffset(months=fcst_window)
        ts_exog_future = exog[ts_start_exog[0]:ts_end_exog[0]]
        np_exog = np.array(ts_exog_future)

        # forecast time serie using exogenous factors
        sarimax_rslt = sarimax_mdl.predict(alpha=0.05,
                                           start=0,
                                           end=(len(timeseries) - 1) + fcst_window,
                                           exog=np_exog)
        sarimax_rslt[12] = np.mean([sarimax_rslt[12-1], sarimax_rslt[12+1]])  # PATCH for buggy value
        sarimax_fcst = pd.concat([timeseries, sarimax_rslt], axis=1)
        sarimax_fcst.columns = ['Actual', 'Forecast']

    return sarimax_fcst
Exemple #7
0
def test_sarimax_time_invariant(revisions, updates):
    # Construct previous and updated datasets
    endog = dta['infl'].copy()
    comparison_type = None
    if updates:
        endog1 = endog.loc[:'2009Q2'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
    else:
        endog1 = endog.loc[:'2009Q3'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
        # Without updates and without NaN values, we need to specify that
        # the type of the comparison object that we're passing is "updated"
        comparison_type = 'updated'
    if revisions:
        endog1.iloc[-1] = 0.

    # Get the previous results object and compute the news
    mod = sarimax.SARIMAX(endog1)
    res = mod.smooth([0.5, 1.0])
    news = res.news(endog2, start='2009Q2', end='2010Q1',
                    comparison_type=comparison_type)

    # Compute the true values for each combination of (revsions, updates)
    impact_dates = pd.period_range(start='2009Q2', end='2010Q1', freq='Q')
    impacted_variables = ['infl']

    # Revisions
    if revisions and updates:
        revisions_index = pd.MultiIndex.from_arrays(
            [endog1.index[-1:], ['infl']],
            names=['revision date', 'revised variable'])
        # If we have updates, the revision is to 2009Q2
        revision_impacts = endog2.iloc[-2] * 0.5**np.arange(4).reshape(4, 1)
    elif revisions:
        revisions_index = pd.MultiIndex.from_arrays(
            [endog1.index[-1:], ['infl']],
            names=['revision date', 'revised variable'])
        # With no updates, the revision is to 2009Q3
        revision_impacts = np.r_[
            0, endog2.iloc[-1] * 0.5**np.arange(3)].reshape(4, 1)
    else:
        revisions_index = pd.MultiIndex.from_arrays(
            [[], []], names=['revision date', 'revised variable'])
        revision_impacts = None

    # Updates
    if updates:
        updates_index = pd.MultiIndex.from_arrays(
            [pd.period_range(start='2009Q3', periods=1, freq='Q'), ['infl']],
            names=['update date', 'updated variable'])
        update_impacts = np.array([[
            0, endog.loc['2009Q3'] - 0.5 * endog.loc['2009Q2'],
            0.5 * endog.loc['2009Q3'] - 0.5**2 * endog.loc['2009Q2'],
            0.5**2 * endog.loc['2009Q3'] - 0.5**3 * endog.loc['2009Q2']]]).T
    else:
        updates_index = pd.MultiIndex.from_arrays(
            [[], []], names=['update date', 'updated variable'])
        update_impacts = None

    # Impact forecasts
    if updates:
        prev_impacted_forecasts = np.r_[
            endog1.iloc[-1] * 0.5**np.arange(4)].reshape(4, 1)
    else:
        prev_impacted_forecasts = np.r_[
            endog1.iloc[-2], endog1.iloc[-1] * 0.5**np.arange(3)].reshape(4, 1)
    post_impacted_forecasts = np.r_[
        endog2.iloc[-2], 0.5 ** np.arange(3) * endog2.iloc[-1]].reshape(4, 1)

    # News
    if updates:
        # Note: update_forecasts is created using the endog2 dataset even if
        # there were revisions, because it should be computed after revisions
        # have already been taken into account
        update_forecasts = [0.5 * endog2.loc['2009Q2']]
        update_realized = [endog2.loc['2009Q3']]
        news_desired = [update_realized[i] - update_forecasts[i]
                        for i in range(len(update_forecasts))]
        weights = pd.DataFrame(np.r_[0, 0.5**np.arange(3)]).T
    else:
        update_forecasts = pd.Series([], dtype=np.float64)
        update_realized = pd.Series([], dtype=np.float64)
        news_desired = pd.Series([], dtype=np.float64)
        weights = pd.DataFrame(np.zeros((0, 4)))

    # Run unit tests
    check_news(news, revisions, updates, impact_dates, impacted_variables,
               revisions_index, updates_index,
               revision_impacts, update_impacts,
               prev_impacted_forecasts, post_impacted_forecasts,
               update_forecasts, update_realized, news_desired, weights)
def fun(i, k):

    sys.stdout = open(str(k + 1) + '/' + str(i - 1) + '.csv', "w")

    dateparse = lambda dates: pd.time.strptime(dates, '%H-%M-%S.%f')
    data = pd.read_csv(str(k + 1) + '/sep' + str(i) + '.csv')

    ts = Series.from_csv(str(k + 1) + '/sep' + str(i) + '.csv', header=0)
    info = pd.read_csv(str(k + 1) + '/sep' + str(i) + '.csv')

    num_of_for = 10  #NUMBER OF FORECASTS

    flag = 0
    count = 0

    s = info['#Passengers'].values

    for z in s:
        if z == 0:
            count = count + 1

    if count >= 0.99 * len(ts):
        flag = 1

    if flag == 1:
        for i in range(num_of_for):
            print(0)

    else:

        def fun(ser):
            co = 0
            c1 = 0
            for e in ser[len(ser) - 12:]:
                if e == 0:
                    co = co + 1
                if e == 1:
                    c1 = c1 + 1
                if co > c1:
                    ser[len(ser) - 3] = 1
                if co < c1:
                    ser[len(ser) - 3] = 0
            return ser

        tsmod = fun(ts)
        ts_log = np.sqrt(tsmod)

        moving_avg = ts_log.rolling(12).mean()

        ts_log_moving_avg_diff = ts_log - moving_avg

        ts_log_diff = ts_log - ts_log.shift()

        from statsmodels.tsa.seasonal import seasonal_decompose
        decomposition = seasonal_decompose(ts_log, freq=2)

        trend = decomposition.trend
        seasonal = decomposition.seasonal
        residual = decomposition.resid

        ts_log_decompose = residual

        from statsmodels.tsa.statespace import sarimax

        model = sarimax.SARIMAX(ts_log,
                                order=(4, 1, 1),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
        results_ARIMA = model.fit()

        predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues,
                                           copy=True)

        predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()

        predictions_ARIMA_log = pd.Series(ts_log.ix[0], index=ts_log.index)
        predictions_ARIMA_log = predictions_ARIMA_log.add(
            predictions_ARIMA_diff_cumsum, fill_value=0)
        predictions_ARIMA_log.head()

        predictions_ARIMA = np.square(predictions_ARIMA_log)

        start_index = len(ts)
        end_index = len(ts)

        forecast = results_ARIMA.forecast(steps=num_of_for)

        for p in forecast:
            print(p)

        sys.stdout.close()
Exemple #9
0
def get_pa_indecies():
    a = acf(train["y"], nlags=240)
    p = pacf(train["y"], nlags=240)
    ab = a[np.abs(a) > (1.96 / np.sqrt(len(train["y"])))]
    pb = p[np.abs(p) > (1.96 / np.sqrt(len(train["y"])))]
    print("acf")
    print(np.where(np.isin(a, ab)))
    print("pacf")
    print(np.where(np.isin(p, pb)))
    return None


#get_pa_indecies()

mod = sarimax.SARIMAX(train_bam["bam"].values,
                      trend='n',
                      order=(2, 0, 1),
                      seasonal_order=(2, 1, 0, 180))
results = mod.fit()
#print(results.summary())

test_bam['forecast'] = results.forecast(104)
fig = plt.figure(figsize=(12, 8))
print("train_bam: " + str(test_bam["forecast"].shape))
print("test_bam: " + str(test_bam["bam"].shape))
plt.plot(train_bam["ds"], train_bam['bam'])
plt.plot(test_bam["ds"], test_bam["forecast"])
plt.show()

print("The Root Mean Squared Error is: " + str(
    np.sqrt(metrics.mean_squared_error(test_bam["bam"], test_bam["forecast"])))
      )
Exemple #10
0
max_date = data.period.max()
min_date = data.period.min()

num_of_actual_points = data.index.shape[0]
num_of_expected_points = (max_date.year - min_date.year) * 12 + max_date.month - min_date.month + 1

print("Date range: {} - {}".format(min_date.strftime("%d.%m.%Y"), max_date.strftime("%d.%m.%Y")))
print("Number of data points: {} of expected {}".format(num_of_actual_points, num_of_expected_points))


max_date = df.period.max()
min_date = df.period.min()

num_of_actual_points = df.index.shape[0]
num_of_expected_points = (max_date.year - min_date.year) * 12 + max_date.month - min_date.month + 1

print("Date range: {} - {}".format(min_date.strftime("%d.%m.%Y"), max_date.strftime("%d.%m.%Y")))
print("Number of data points: {} of expected {}".format(num_of_actual_points, num_of_expected_points))


from statsmodels.tsa.statespace import sarimax
model = sarimax.SARIMAX()




                  



def test_smoothed_decomposition_sarimax(use_exog, trend, concentrate_scale,
                                        measurement_error):
    endog = np.array([[0.2, np.nan, 1.2, -0.3, -1.5]]).T
    exog = np.array([2, 5.3, -1, 3.4, 0.]) if use_exog else None

    trend_params = [0.1]
    ar_params = [0.5]
    exog_params = [1.4]
    meas_err_params = [1.2]
    cov_params = [0.8]

    params = []
    if trend in ['c', 't']:
        params += trend_params
    if use_exog:
        params += exog_params
    params += ar_params
    if measurement_error:
        params += meas_err_params
    if not concentrate_scale:
        params += cov_params

    # Fit the models
    mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend,
                          exog=exog if use_exog else None,
                          concentrate_scale=concentrate_scale,
                          measurement_error=measurement_error)
    prior_mean = np.array([-0.4])
    prior_cov = np.eye(1) * 1.2
    mod.ssm.initialize_known(prior_mean, prior_cov)
    res = mod.smooth(params)

    # Check smoothed state

    # Get the decomposition of the smoothed state
    cd, coi, csi, cp = res.get_smoothed_decomposition(
        decomposition_of='smoothed_state')

    # Sum across contributions (i.e. from observations at each time period and
    # from the initial state)
    css = ((cd + coi).sum(axis=1) + csi.sum(axis=1) + cp.sum(axis=1))
    css = css.unstack(level='state_to').values

    # Summing up all contributions should yield the actual smoothed state,
    # so the smoothed state vector is the desired result of this test
    ss = np.array(res.states.smoothed)

    assert_allclose(css, ss, atol=1e-12)

    # Check smoothed signal

    # Use the summed state contributions and multiply by the design matrix
    # to get the smoothed signal
    csf = ((css.T * mod['design'][:, :, None]).sum(axis=1)
           + mod['obs_intercept']).T

    # Summing up all contributions should yield the smoothed prediction of
    # the observed variables
    s_sig = res.predict(information_set='smoothed', signal_only=True)
    sf = res.predict(information_set='smoothed', signal_only=False)

    assert_allclose(csf[:, 0], sf)

    # Now check the smoothed signal against the sum computed from the
    # decomposed smoothed signal
    cd, coi, csi, cp = res.get_smoothed_decomposition(
        decomposition_of='smoothed_signal')

    # Sum across contributions (i.e. from observations and intercepts at each
    # time period and from the initial state) to get the smoothed signal
    cs_sig = ((cd + coi).sum(axis=1) + csi.sum(axis=1) + cp.sum(axis=1))
    cs_sig = cs_sig.unstack(level='variable_to').values

    assert_allclose(cs_sig[:, 0], s_sig, atol=1e-12)

    # Add in the observation intercept to get the smoothed forecast
    csf = cs_sig + mod['obs_intercept'].T

    assert_allclose(csf[:, 0], sf)
def test_fit():
    # Test that fitting works regardless of the level of memory conservation
    # used
    endog = dta['infl'].iloc[:20]
    mod = sarimax.SARIMAX(endog, order=(1, 0, 0), concentrate_scale=True)

    res = mod.fit(disp=False)

    options_smooth = [
        'memory_no_forecast', 'memory_no_filtered', 'memory_no_likelihood',
        'memory_no_std_forecast'
    ]
    for option in options_smooth:
        mod.ssm.set_conserve_memory(0)
        setattr(mod.ssm, option, True)
        res2 = mod.fit(res.params, disp=False)

        # General check that smoothing results are available
        assert_allclose(res2.smoothed_state, res.smoothed_state, atol=1e-10)

        # Specific checks for each type
        if option == 'memory_no_forecast':
            assert_(res2.forecasts is None)
            assert_(res2.forecasts_error is None)
            assert_(res2.forecasts_error_cov is None)
        else:
            assert_allclose(res2.forecasts, res.forecasts)
            assert_allclose(res2.forecasts_error, res.forecasts_error)
            assert_allclose(res2.forecasts_error_cov, res.forecasts_error_cov)

        if option == 'memory_no_filtered':
            assert_(res2.filtered_state is None)
            assert_(res2.filtered_state_cov is None)
        else:
            assert_allclose(res2.filtered_state, res.filtered_state)
            assert_allclose(res2.filtered_state_cov, res.filtered_state_cov)

        assert_allclose(res2.llf, res.llf)
        if option == 'memory_no_likelihood':
            assert_(res2.llf_obs is None)
        else:
            assert_allclose(res2.llf_obs, res.llf_obs)

        if option == 'memory_no_std_forecast':
            assert_(res2.standardized_forecasts_error is None)
        else:
            assert_allclose(res2.standardized_forecasts_error,
                            res.standardized_forecasts_error)

    options_filter_only = [
        'memory_no_predicted', 'memory_no_gain', 'memory_no_smoothing',
        'memory_conserve'
    ]
    for option in options_filter_only[2:]:
        mod.ssm.set_conserve_memory(0)
        setattr(mod.ssm, option, True)
        res2 = mod.fit(res.params, disp=False)

        # General check that smoothing results are not available
        assert_(res2.smoothed_state is None)

        # Specific checks for each type
        if option in ['memory_no_predicted', 'memory_conserve']:
            assert_(res2.predicted_state is None)
            assert_(res2.predicted_state_cov is None)
        else:
            assert_allclose(res2.predicted_state, res.predicted_state)
            assert_allclose(res2.predicted_state_cov, res.predicted_state_cov)

        if option in ['memory_no_gain', 'memory_conserve']:
            assert_(res2.filter_results._kalman_gain is None)
        else:
            assert_allclose(res2.filter_results.kalman_gain,
                            res.filter_results.kalman_gain)
Exemple #13
0
def test_sarimax_time_varying(revisions, updates, which):
    # This is primarily a test that the `news` method works with a time-varying
    # setup (i.e. time-varying state space matrices). It tests a time-varying
    # SARIMAX model where the time-varying component has been set to zeros
    # against a time-invariant version of the model.

    # Construct previous and updated datasets
    endog = dta['infl'].copy()
    comparison_type = None
    if updates:
        endog1 = endog.loc[:'2009Q2'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
    else:
        endog1 = endog.loc[:'2009Q3'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
        # Without updates and without NaN values, we need to specify that
        # the type of the comparison object that we're passing is "updated"
        comparison_type = 'updated'
    if revisions:
        endog1.iloc[-1] = 0.

    exog1 = None
    exog2 = None
    trend = 'n'
    if which == 'exog':
        exog1 = np.ones_like(endog1)
        exog2 = np.ones_like(endog2)
    elif which == 'trend':
        trend = 't'

    # Compute the news from a model with a trend/exog term (so the model is
    # time-varying), but with the coefficient set to zero (so that it will be
    # equivalent to the time-invariant model)
    mod1 = sarimax.SARIMAX(endog1, exog=exog1, trend=trend)
    res1 = mod1.smooth([0., 0.5, 1.0])
    news1 = res1.news(endog2,
                      exog=exog2,
                      start='2008Q1',
                      end='2009Q3',
                      comparison_type=comparison_type)

    # Compute the news from a model without a trend term
    mod2 = sarimax.SARIMAX(endog1)
    res2 = mod2.smooth([0.5, 1.0])
    news2 = res2.news(endog2,
                      start='2008Q1',
                      end='2009Q3',
                      comparison_type=comparison_type)

    attrs = [
        'total_impacts', 'update_impacts', 'revision_impacts', 'news',
        'weights', 'update_forecasts', 'update_realized',
        'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc',
        'revisions_ix', 'updates_iloc', 'updates_ix'
    ]

    for attr in attrs:
        w = getattr(news1, attr)
        x = getattr(news2, attr)
        if isinstance(x, pd.Series):
            assert_series_equal(w, x)
        else:
            assert_frame_equal(w, x)
Exemple #14
0
def get_results_with_val(df,
                         exo,
                         p,
                         d,
                         q,
                         P,
                         D,
                         Q,
                         s,
                         model,
                         y_col_name,
                         val_size_perc,
                         n_predictions=5):
    """Fit SARIMAX on input df (optional input and future exo regr) and predict validation + future values
    Or use param fitted model (optional input and future exo regr) to predict validation + future values
    Plot input and output (val+future) predictions

    Parameters
    ----------
    df : DataFrame
        R Time Series
    exo : DataFrame, optional
        Exogenous Regressors to model Y
    p : int
        AR parameter for the SARIMAX on Y
    d : int
        Integrated parameter for the SARIMAX on Y
    q : int
        MA parameter for the SARIMAX on Y
    P : int
        Seasonal AR parameter for the SARIMAX on Y
    D : int
        Seasonal Integrated parameter for the SARIMAX on Y
    Q : int
        Seasonal MA parameter for the SARIMAX on Y
    s : int
        Seasonality timeframe for Y
    model : SARIMAX Fitted model, optional
        Pre-fitted SARIMAX model to use to predict Y values
    y_col_name : String
        Column name of Y values
    val_size_perc : Float
        Part of the df to use for Validation. 
        Format: [0.0;1.0]
    n_predictions : int, optional
        Number of future values to predict for Y, by default 5

    Returns
    -------
    smodel: json
        Fitted SARIMAX model on Y
    results: DataFrame
        DataFrame including the train, validation and forecast values from the SARIMAX fitted model on Y Time Series
    """

    X = df[y_col_name].values
    Y = df["Date"].values
    train_size = int(len(X) * (1 - val_size_perc))
    train, test = X[:train_size], X[train_size:len(X)]
    week = Y[train_size:len(X)]
    exo_past, exo_future = None, None

    # Split Exo Regressor into past (train + val) and future (forecast) values
    if exo is not None:
        exo_past, exo_future = exo[:len(X)], exo[len(X):len(exo)]

    # Create SARIMAX model or use input model
    print("Checking model for fit...")
    if model is None:
        print("No input model, starting to fit SARIMAX" + str(p) + str(d) +
              str(q) + str(P) + str(D) + str(Q) + str(s))
        smodel = pmdarima.arima.ARIMA(order=[p, d, q],
                                      method="lbfgs",
                                      maxiter=50,
                                      suppress_warnings=True)
        smodel = smodel.fit(df[y_col_name].values, exo_past)
        print("Finished SARIMAX fit.")
    else:
        print("Existing input model, will use it")
        smodel = model

    # Test model on the Validation set
    history = [x for x in train]
    predictions = list()
    for t in range(len(test)):
        model = sarimax.SARIMAX(history,
                                order=smodel.order,
                                seasonal_order=smodel.seasonal_order,
                                enforce_stationarity=False)
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        if output[0] < 0:
            yhat = 0
        else:
            yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        print("predicted=%f, expected=%f" % (yhat, obs))
    error = metrics.mean_squared_error(test, predictions)
    print("Test MSE: %.3f" % error)

    # Add Train set to output
    data = pd.DataFrame()
    data["Date"] = Y[0:train_size]
    data["Predicted Net Order Value"] = None
    data["Actual Net Order Value"] = X[0:train_size]
    data["Classification"] = "train"

    # Add Validation set to output
    Tested = pd.DataFrame()
    Tested["Date"] = week
    Tested["Predicted Net Order Value"] = predictions
    Tested["Actual Net Order Value"] = test
    Tested["Classification"] = "test"
    Tested["Predicted Net Order Value"] = Tested[
        "Predicted Net Order Value"].astype(float)
    Tested["Date"] = pd.to_datetime(Tested["Date"])

    # Add Forecast set to output
    print("Predicting forecast values...")
    n_periods = n_predictions
    fitted, confint = smodel.predict(n_periods=n_periods,
                                     return_conf_int=True,
                                     exogenous=exo_future)
    print("Finished predicting forecast values.")
    rng = pd.date_range(df["Date"].max(), periods=n_periods, freq="7D")
    forecast = pd.DataFrame({
        "Date": rng,
        "Predicted Net Order Value": fitted,
        "Actual Net Order Value": None,
        "Classification": "forecast",
        "Conf_lower": confint[:, 0],
        "Conf_Upper": confint[:, 1],
    })
    forecast = forecast.drop(forecast.index[0])

    # Combine all sets
    results = data.append(Tested, ignore_index=True)
    results = results.append(forecast, ignore_index=True)
    results["Date"] = pd.to_datetime(results["Date"])
    # Reformat Dates to Date type
    results["Date"] = pd.to_datetime(results["Date"])
    return smodel, results
def test_varmax():
    steps = 10

    # Clear warnings
    varmax.__warningregistry__ = {}

    # VAR(2) - single series
    mod1 = varmax.VARMAX([[0]], order=(2, 0), trend='n')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 0))
    actual = mod1.impulse_responses([0.5, 0.2, 1], steps)
    desired = mod2.impulse_responses([0.5, 0.2, 1], steps)
    assert_allclose(actual, desired)

    # VMA(2) - single series
    mod1 = varmax.VARMAX([[0]], order=(0, 2), trend='n')
    mod2 = sarimax.SARIMAX([0], order=(0, 0, 2))
    actual = mod1.impulse_responses([0.5, 0.2, 1], steps)
    desired = mod2.impulse_responses([0.5, 0.2, 1], steps)
    assert_allclose(actual, desired)

    # VARMA(2, 2) - single series
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='n')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 2))
    actual = mod1.impulse_responses([0.5, 0.2, 0.1, -0.2, 1], steps)
    desired = mod2.impulse_responses([0.5, 0.2, 0.1, -0.2, 1], steps)
    assert_allclose(actual, desired)

    # VARMA(2, 2) + trend - single series
    warning = EstimationWarning
    match = r'VARMA\(p,q\) models is not'
    with pytest.warns(warning, match=match):
        mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='c')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 2), trend='c')
    actual = mod1.impulse_responses([10, 0.5, 0.2, 0.1, -0.2, 1], steps)
    desired = mod2.impulse_responses([10, 0.5, 0.2, 0.1, -0.2, 1], steps)
    assert_allclose(actual, desired)

    # VAR(2) + constant
    # Stata:
    # webuse lutkepohl2
    # var dln_inv dln_inc, lags(1/2)
    # irf create irf3, set(irf3) step(10)
    # irf table irf
    # irf table oirf
    params = [
        -.00122728, .01503679, -.22741923, .71030531, -.11596357, .51494891,
        .05974659, .02094608, .05635125, .08332519, .04297918, .00159473,
        .01096298
    ]
    irf_00 = [
        1, -.227419, -.021806, .093362, -.001875, -.00906, .009605, .001323,
        -.001041, .000769, .00032
    ]
    irf_01 = [
        0, .059747, .044015, -.008218, .007845, .004629, .000104, .000451,
        .000638, .000063, .000042
    ]
    irf_10 = [
        0, .710305, .36829, -.065697, .084398, .043038, .000533, .005755,
        .006051, .000548, .000526
    ]
    irf_11 = [
        1, .020946, .126202, .066419, .028735, .007477, .009878, .003287,
        .001266, .000986, .0005
    ]
    oirf_00 = [
        0.042979, -0.008642, -0.00035, 0.003908, 0.000054, -0.000321, 0.000414,
        0.000066, -0.000035, 0.000034, 0.000015
    ]
    oirf_01 = [
        0.001595, 0.002601, 0.002093, -0.000247, 0.000383, 0.000211, 0.00002,
        0.000025, 0.000029, 4.30E-06, 2.60E-06
    ]
    oirf_10 = [
        0, 0.007787, 0.004037, -0.00072, 0.000925, 0.000472, 5.80E-06,
        0.000063, 0.000066, 6.00E-06, 5.80E-06
    ]
    oirf_11 = [
        0.010963, 0.00023, 0.001384, 0.000728, 0.000315, 0.000082, 0.000108,
        0.000036, 0.000014, 0.000011, 5.50E-06
    ]

    mod = varmax.VARMAX([[0, 0]], order=(2, 0), trend='c')

    # IRFs
    actual = mod.impulse_responses(params, steps, impulse=0)
    assert_allclose(actual, np.c_[irf_00, irf_01], atol=1e-6)

    actual = mod.impulse_responses(params, steps, impulse=1)
    assert_allclose(actual, np.c_[irf_10, irf_11], atol=1e-6)

    # Orthogonalized IRFs
    actual = mod.impulse_responses(params,
                                   steps,
                                   impulse=0,
                                   orthogonalized=True)
    assert_allclose(actual, np.c_[oirf_00, oirf_01], atol=1e-6)

    actual = mod.impulse_responses(params,
                                   steps,
                                   impulse=1,
                                   orthogonalized=True)
    assert_allclose(actual, np.c_[oirf_10, oirf_11], atol=1e-6)

    # VARMA(2, 2) + trend + exog
    # TODO: This is just a smoke test
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod = varmax.VARMAX(np.random.normal(size=(steps, 2)),
                            order=(2, 2),
                            trend='c',
                            exog=np.ones(steps),
                            enforce_stationarity=False,
                            enforce_invertibility=False)
    mod.impulse_responses(mod.start_params, steps)
Exemple #16
0
# The distribution has seasonal part in it.
# We shall run pmd auto-arima to get the orderes

from pmdarima import auto_arima

auto_arima(df['Employees'], seasonal=True, max_p=2, max_q=2, max_d=2,
           m=12).summary()

Ltrain = 12
train = df[:-Ltrain]
test = df[-Ltrain:]

from statsmodels.tsa.statespace import sarimax

result = sarimax.SARIMAX(train['Employees'],
                         order=(1, 1, 2),
                         seasonal_order=(1, 0, 1, 12),
                         enforce_invertibility=False).fit()
result.summary()
prediction = result.predict(start=len(train), end=len(df) - 1,
                            type='levels').rename('SARIMA (1, 1, 2')

ax = df['Employees'].plot(legend=True)
prediction.plot(legend=True)
plt.show()

len(test)
len(prediction)
from statsmodels.tools.eval_measures import rmse

rmse(test['Employees'], prediction)
test['Employees'].mean()
import pandas as pd
df = pd.read_csv("weather_data_train_labels.csv",parse_dates =['datetime'],
                           sep =';',decimal=',',infer_datetime_format=True)
print(df.dtypes)
temp_df = df[["datetime","U_mu"]]
print()
from statsmodels.tsa.seasonal import seasonal_decompose
from matplotlib import pyplot as plt
# ETS decomposition
result = seasonal_decompose(temp_df["U_mu"],model='multiplicative',extrapolate_trend='freq',freq=365)
result.plot()
#plt.show()
from statsmodels.tsa.statespace import sarimax
model = sarimax.SARIMAX(temp_df["U_mu"],order = (0, 1, 1),seasonal_order =(2, 1, 1, 12))
result = model.fit()
print(result.summary())
pred = result.get_prediction(start=pd.to_datetime('2012-09-20'),dynamic=False)
pred_ci = pred.conf_int()
print(pred,pred_ci)


Exemple #18
0
def Sarimax(ts, order, seasonal_order):
    fit = sms.SARIMAX(ts, order=order,seasonal_order=seasonal_order).fit()
    fcst = fit.predict(start=121, end=127, dynamic=True)
    return fcst
def test_smoothed_state_obs_weights_sarimax(use_exog, trend,
                                            concentrate_scale,
                                            measurement_error):
    endog = np.array([[0.2, np.nan, 1.2, -0.3, -1.5]]).T
    exog = np.array([2, 5.3, -1, 3.4, 0.]) if use_exog else None

    trend_params = [0.1]
    ar_params = [0.5]
    exog_params = [1.4]
    meas_err_params = [1.2]
    cov_params = [0.8]

    params = []
    if trend in ['c', 't']:
        params += trend_params
    if use_exog:
        params += exog_params
    params += ar_params
    if measurement_error:
        params += meas_err_params
    if not concentrate_scale:
        params += cov_params

    # Fit the models
    mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend,
                          exog=exog if use_exog else None,
                          concentrate_scale=concentrate_scale,
                          measurement_error=measurement_error)
    prior_mean = np.array([-0.4])
    prior_cov = np.eye(1) * 1.2
    mod.ssm.initialize_known(prior_mean, prior_cov)
    res = mod.smooth(params)

    # Compute the desiried weights
    n = mod.nobs
    m = mod.k_states
    p = mod.k_endog

    desired = np.zeros((n, n, m, p)) * np.nan
    # Here we manually compute the weights by adjusting one observation at a
    # time
    for j in range(n):
        for i in range(p):
            if np.isnan(endog[j, i]):
                desired[:, j, :, i] = np.nan
            else:
                y = endog.copy()
                y[j, i] += 1.0
                tmp_mod = sarimax.SARIMAX(y, order=(1, 0, 0), trend=trend,
                                          exog=exog if use_exog else None,
                                          concentrate_scale=concentrate_scale,
                                          measurement_error=measurement_error)
                tmp_mod.ssm.initialize_known(prior_mean, prior_cov)
                tmp_res = tmp_mod.smooth(params)

                desired[:, j, :, i] = (tmp_res.smoothed_state.T
                                       - res.smoothed_state.T)

    desired_state_intercept_weights = np.zeros((n, n, m, m)) * np.nan
    # Here we manually compute the weights by adjusting one state intercept
    # at a time
    for j in range(n):
        for ell in range(m):
            tmp_mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend,
                                      exog=exog if use_exog else None,
                                      concentrate_scale=concentrate_scale,
                                      measurement_error=measurement_error)
            tmp_mod.ssm.initialize_known(prior_mean, prior_cov)
            tmp_mod.update(params)
            if tmp_mod['state_intercept'].ndim == 1:
                si = tmp_mod['state_intercept']
                tmp_mod['state_intercept'] = np.zeros((mod.k_states, mod.nobs))
                tmp_mod['state_intercept', :, :] = si
            tmp_mod['state_intercept', ell, j] += 1.0
            tmp_res = tmp_mod.ssm.smooth()

            desired_state_intercept_weights[:, j, :, ell] = (
                tmp_res.smoothed_state.T - res.smoothed_state.T)

    desired_prior_weights = np.zeros((n, m, m)) * np.nan
    # Here we manually compute the weights by adjusting one prior element at
    # a time
    for i in range(m):
        a = prior_mean.copy()
        a[i] += 1
        tmp_mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend,
                                  exog=exog if use_exog else None,
                                  concentrate_scale=concentrate_scale,
                                  measurement_error=measurement_error)
        tmp_mod.ssm.initialize_known(a, prior_cov)
        tmp_res = tmp_mod.smooth(params)

        desired_prior_weights[:, :, i] = (tmp_res.smoothed_state.T
                                          - res.smoothed_state.T)

    mod.ssm.initialize_known(prior_mean, prior_cov)
    actual, actual_state_intercept_weights, actual_prior_weights = (
        tools.compute_smoothed_state_weights(res))

    assert_allclose(actual, desired, atol=1e-12)
    assert_allclose(actual_state_intercept_weights,
                    desired_state_intercept_weights, atol=1e-12)
    assert_allclose(actual_prior_weights, desired_prior_weights, atol=1e-12)
Exemple #20
0
def test_append_extend_apply_invalid():
    # Test for invalid options to append, extend, and apply
    niledata = nile.data.load_pandas().data['volume']
    niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS')

    endog1 = niledata.iloc[:20]
    endog2 = niledata.iloc[20:40]

    mod = sarimax.SARIMAX(endog1, order=(1, 0, 0), concentrate_scale=True)
    res1 = mod.smooth([0.5])

    assert_raises(ValueError,
                  res1.append,
                  endog2,
                  fit_kwargs={'cov_type': 'approx'})
    assert_raises(ValueError,
                  res1.extend,
                  endog2,
                  fit_kwargs={'cov_type': 'approx'})
    assert_raises(ValueError,
                  res1.apply,
                  endog2,
                  fit_kwargs={'cov_type': 'approx'})

    assert_raises(ValueError, res1.append, endog2, fit_kwargs={'cov_kwds': {}})
    assert_raises(ValueError, res1.extend, endog2, fit_kwargs={'cov_kwds': {}})
    assert_raises(ValueError, res1.apply, endog2, fit_kwargs={'cov_kwds': {}})

    # Test for exception when given a different frequency
    wrong_freq = niledata.iloc[20:40]
    wrong_freq.index = pd.date_range(start=niledata.index[0],
                                     periods=len(wrong_freq),
                                     freq='MS')
    message = ('Given `endog` does not have an index that extends the index of'
               ' the model. Expected index frequency is')
    with pytest.raises(ValueError, match=message):
        res1.append(wrong_freq)
    with pytest.raises(ValueError, match=message):
        res1.extend(wrong_freq)
    message = ('Given `exog` does not have an index that extends the index of'
               ' the model. Expected index frequency is')
    with pytest.raises(ValueError, match=message):
        res1.append(endog2, exog=wrong_freq)
    message = 'The indices for endog and exog are not aligned'
    with pytest.raises(ValueError, match=message):
        res1.extend(endog2, exog=wrong_freq)

    # Test for exception when given the same frequency but not right after the
    # end of model
    not_cts = niledata.iloc[21:41]
    message = ('Given `endog` does not have an index that extends the index of'
               ' the model.$')
    with pytest.raises(ValueError, match=message):
        res1.append(not_cts)
    with pytest.raises(ValueError, match=message):
        res1.extend(not_cts)
    message = ('Given `exog` does not have an index that extends the index of'
               ' the model.$')
    with pytest.raises(ValueError, match=message):
        res1.append(endog2, exog=not_cts)
    message = 'The indices for endog and exog are not aligned'
    with pytest.raises(ValueError, match=message):
        res1.extend(endog2, exog=not_cts)

    # # Test for problems with non-date indexes
    endog3 = pd.Series(niledata.iloc[:20].values)
    endog4 = pd.Series(niledata.iloc[:40].values)[20:]
    mod2 = sarimax.SARIMAX(endog3,
                           order=(1, 0, 0),
                           exog=endog3,
                           concentrate_scale=True)
    res2 = mod2.smooth([0.2, 0.5])

    # Test for exception when given the same frequency but not right after the
    # end of model
    not_cts = pd.Series(niledata[:41].values)[21:]
    message = ('Given `endog` does not have an index that extends the index of'
               ' the model.$')
    with pytest.raises(ValueError, match=message):
        res2.append(not_cts)
    with pytest.raises(ValueError, match=message):
        res2.extend(not_cts)
    message = ('Given `exog` does not have an index that extends the index of'
               ' the model.$')
    with pytest.raises(ValueError, match=message):
        res2.append(endog4, exog=not_cts)
    message = 'The indices for endog and exog are not aligned'
    with pytest.raises(ValueError, match=message):
        res2.extend(endog4, exog=not_cts)
Exemple #21
0
def test_start_end_dates(use_periods):
    endog = dta['infl'].copy()
    if use_periods:
        index_range = pd.period_range
    else:
        def index_range(*args, **kwargs):
            return pd.period_range(*args, **kwargs).to_timestamp(freq='Q')
        endog = endog.to_timestamp(freq='Q')
    mod = sarimax.SARIMAX(endog.iloc[:-1])
    res = mod.smooth([0.5, 1.0])

    # Default is the first out-of-sample period
    news = res.news(endog)
    desired = index_range(start='2009Q2', periods=1, freq='Q')
    assert_(news.total_impacts.index.equals(desired))

    # Start (dates), periods
    news = res.news(endog, start='2009Q1', periods=1)
    desired = index_range(start='2009Q1', periods=1, freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, start='2009Q1', periods=2)
    desired = index_range(start='2009Q1', periods=2, freq='Q')
    assert_(news.total_impacts.index.equals(desired))

    # Start (int), periods
    news = res.news(endog, start=mod.nobs - 1, periods=1)
    desired = index_range(start='2009Q2', periods=1, freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, start=mod.nobs - 2, periods=2)
    desired = index_range(start='2009Q1', periods=2, freq='Q')
    assert_(news.total_impacts.index.equals(desired))

    # End (dates), periods
    news = res.news(endog, end='2009Q1', periods=1)
    desired = index_range(end='2009Q1', periods=1, freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, end='2009Q1', periods=2)
    desired = index_range(end='2009Q1', periods=2, freq='Q')
    assert_(news.total_impacts.index.equals(desired))

    # End (int), periods
    news = res.news(endog, end=mod.nobs - 1, periods=1)
    desired = index_range(end='2009Q2', periods=1, freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, end=mod.nobs - 2, periods=2)
    desired = index_range(end='2009Q1', periods=2, freq='Q')
    assert_(news.total_impacts.index.equals(desired))

    # Start (dates), end (dates)
    news = res.news(endog, start='2009Q1', end='2009Q1')
    desired = index_range(start='2009Q1', end='2009Q1', freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    news = res.news(endog, start='2009Q1', end='2009Q2')
    desired = index_range(start='2009Q1', end='2009Q2', freq='Q')
    assert_(news.total_impacts.index.equals(desired))

    # Start (dates), end (int)
    news = res.news(endog, start='2009Q1', end=mod.nobs - 2)
    desired = index_range(start='2009Q1', end='2009Q1', freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    predicted = res.predict(start='2009Q1', end=mod.nobs - 2)
    assert_(news.total_impacts.index.equals(predicted.index))
    news = res.news(endog, start='2009Q1', end=mod.nobs - 1)
    desired = index_range(start='2009Q1', end='2009Q2', freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    predicted = res.predict(start='2009Q1', end=mod.nobs - 1)
    assert_(news.total_impacts.index.equals(predicted.index))

    # Start (int), end (dates)
    news = res.news(endog, start=mod.nobs - 2, end='2009Q1')
    desired = index_range(start='2009Q1', end='2009Q1', freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    predicted = res.predict(start=mod.nobs - 2, end='2009Q1')
    assert_(news.total_impacts.index.equals(predicted.index))
    news = res.news(endog, start=mod.nobs - 2, end='2009Q2')
    desired = index_range(start='2009Q1', end='2009Q2', freq='Q')
    assert_(news.total_impacts.index.equals(desired))
    predicted = res.predict(start=mod.nobs - 2, end='2009Q2')
    assert_(news.total_impacts.index.equals(predicted.index))

    # Negative indexes
    # Note that negative indexes are always computed relative to the updated
    # sample, which in this case is 1 observation more than is in `mod.nobs`
    total_nobs = len(endog)
    assert_equal(total_nobs, mod.nobs + 1)

    # Start (dates), end (int)
    desired = index_range(start='2009Q1', end='2009Q1', freq='Q')
    for end in [mod.nobs - 2, total_nobs - 3, -3]:
        news = res.news(endog, start='2009Q1', end=end)
        assert_(news.total_impacts.index.equals(desired))
        # Note: predict does not allow negative indexing
        if end > 0:
            predicted = res.predict(start='2009Q1', end=end)
            assert_(news.total_impacts.index.equals(predicted.index))

    # Start (int), end (dates)
    desired = index_range(start='2009Q1', end='2009Q1', freq='Q')
    for start in [mod.nobs - 2, total_nobs - 3, -3]:
        news = res.news(endog, start=start, end='2009Q1')
        assert_(news.total_impacts.index.equals(desired))
        # Note: predict does not allow negative indexing
        if end > 0:
            predicted = res.predict(start=start, end='2009Q1')
            assert_(news.total_impacts.index.equals(predicted.index))
Exemple #22
0
def test_score_analytic_ar1():
    # Test the score against the analytic score for an AR(1) model with 2
    # observations
    # Let endog = [1, 0.5], params=[0, 1]
    mod = sarimax.SARIMAX([1, 0.5], order=(1, 0, 0))

    def partial_phi(phi, sigma2):
        return -0.5 * (phi**2 + 2 * phi * sigma2 - 1) / (sigma2 * (1 - phi**2))

    def partial_sigma2(phi, sigma2):
        return -0.5 * (2 * sigma2 + phi - 1.25) / (sigma2**2)

    params = np.r_[0., 2]

    # Compute the analytic score
    analytic_score = np.r_[partial_phi(params[0], params[1]),
                           partial_sigma2(params[0], params[1])]

    # Check each of the approximations, transformed parameters
    approx_cs = mod.score(params, transformed=True, approx_complex_step=True)
    assert_allclose(approx_cs, analytic_score)

    approx_fd = mod.score(params, transformed=True, approx_complex_step=False)
    assert_allclose(approx_fd, analytic_score, atol=1e-5)

    approx_fd_centered = (mod.score(params,
                                    transformed=True,
                                    approx_complex_step=False,
                                    approx_centered=True))
    assert_allclose(approx_fd, analytic_score, atol=1e-5)

    harvey_cs = mod.score(params,
                          transformed=True,
                          method='harvey',
                          approx_complex_step=True)
    assert_allclose(harvey_cs, analytic_score)
    harvey_fd = mod.score(params,
                          transformed=True,
                          method='harvey',
                          approx_complex_step=False)
    assert_allclose(harvey_fd, analytic_score, atol=1e-5)
    harvey_fd_centered = mod.score(params,
                                   transformed=True,
                                   method='harvey',
                                   approx_complex_step=False,
                                   approx_centered=True)
    assert_allclose(harvey_fd_centered, analytic_score, atol=1e-5)

    # Check the approximations for untransformed parameters. The analytic
    # check now comes from chain rule with the analytic derivative of the
    # transformation
    # if L* is the likelihood evaluated at untransformed parameters and
    # L is the likelihood evaluated at transformed parameters, then we have:
    # L*(u) = L(t(u))
    # and then
    # L'*(u) = L'(t(u)) * t'(u)
    def partial_transform_phi(phi):
        return -1. / (1 + phi**2)**(3. / 2)

    def partial_transform_sigma2(sigma2):
        return 2. * sigma2

    uparams = mod.untransform_params(params)

    analytic_score = np.dot(
        np.diag(np.r_[partial_transform_phi(uparams[0]),
                      partial_transform_sigma2(uparams[1])]),
        np.r_[partial_phi(params[0], params[1]),
              partial_sigma2(params[0], params[1])])

    approx_cs = mod.score(uparams, transformed=False, approx_complex_step=True)
    assert_allclose(approx_cs, analytic_score)

    approx_fd = mod.score(uparams,
                          transformed=False,
                          approx_complex_step=False)
    assert_allclose(approx_fd, analytic_score, atol=1e-5)

    approx_fd_centered = (mod.score(uparams,
                                    transformed=False,
                                    approx_complex_step=False,
                                    approx_centered=True))
    assert_allclose(approx_fd_centered, analytic_score, atol=1e-5)

    harvey_cs = mod.score(uparams,
                          transformed=False,
                          method='harvey',
                          approx_complex_step=True)
    assert_allclose(harvey_cs, analytic_score)
    harvey_fd = mod.score(uparams,
                          transformed=False,
                          method='harvey',
                          approx_complex_step=False)
    assert_allclose(harvey_fd, analytic_score, atol=1e-5)
    harvey_fd_centered = mod.score(uparams,
                                   transformed=False,
                                   method='harvey',
                                   approx_complex_step=False,
                                   approx_centered=True)
    assert_allclose(harvey_fd_centered, analytic_score, atol=1e-5)

    # Check the Hessian: these approximations are not very good, particularly
    # when phi is close to 0
    params = np.r_[0.5, 1.]

    def hessian(phi, sigma2):
        hessian = np.zeros((2, 2))
        hessian[0, 0] = (-phi**2 - 1) / (phi**2 - 1)**2
        hessian[1, 0] = hessian[0, 1] = -1 / (2 * sigma2**2)
        hessian[1, 1] = (sigma2 + phi - 1.25) / sigma2**3
        return hessian

    analytic_hessian = hessian(params[0], params[1])

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        assert_allclose(mod._hessian_complex_step(params) * 2,
                        analytic_hessian,
                        atol=1e-1)
        assert_allclose(mod._hessian_finite_difference(params) * 2,
                        analytic_hessian,
                        atol=1e-1)
def test_mixed_basic():
    # Performs a number of tests for setting different initialization for
    # different blocks

    # - 2-dimensional -
    endog = np.zeros(10)
    mod = sarimax.SARIMAX(endog, order=(2, 0, 0))
    phi = [0.5, -0.2]
    sigma2 = 2.
    mod.update(np.r_[phi, sigma2])

    # known has constant
    init = Initialization(mod.k_states)
    init.set(0, 'known', constant=[1.2])

    # > known has constant
    init.set(1, 'known', constant=[-0.2])
    check_initialization(mod, init, [1.2, -0.2], np.diag([0, 0]),
                         np.diag([0, 0]))

    # > diffuse
    init.unset(1)
    init.set(1, 'diffuse')
    check_initialization(mod, init, [1.2, 0], np.diag([0, 1]), np.diag([0, 0]))

    # > approximate diffuse
    init.unset(1)
    init.set(1, 'approximate_diffuse')
    check_initialization(mod, init, [1.2, 0], np.diag([0, 0]),
                         np.diag([0, 1e6]))

    # > stationary
    init.unset(1)
    init.set(1, 'stationary')
    check_initialization(mod, init, [1.2, 0], np.diag([0, 0]), np.diag([0, 0]))

    # known has cov
    init = Initialization(mod.k_states)
    init.set(0, 'known', stationary_cov=np.diag([1]))
    init.set(1, 'diffuse')
    check_initialization(mod, init, [0, 0], np.diag([0, 1]), np.diag([1, 0]))

    # known has both
    init = Initialization(mod.k_states)
    init.set(0, 'known', constant=[1.2], stationary_cov=np.diag([1]))
    init.set(1, 'diffuse')
    check_initialization(mod, init, [1.2, 0], np.diag([0, 1]), np.diag([1, 0]))

    # - 3-dimensional -
    endog = np.zeros(10)
    mod = sarimax.SARIMAX(endog, order=(3, 0, 0))

    # known has constant
    init = Initialization(mod.k_states)
    init.set((0, 2), 'known', constant=[1.2, -0.2])
    init.set(2, 'diffuse')
    check_initialization(mod, init, [1.2, -0.2, 0], np.diag([0, 0, 1]),
                         np.diag([0, 0, 0]))

    # known has cov
    init = Initialization(mod.k_states)
    init.set((0, 2), 'known', stationary_cov=np.diag([1, 4.2]))
    init.set(2, 'diffuse')
    check_initialization(mod, init, [0, 0, 0], np.diag([0, 0, 1]),
                         np.diag([1, 4.2, 0]))

    # known has both
    init = Initialization(mod.k_states)
    init.set((0, 2),
             'known',
             constant=[1.2, -0.2],
             stationary_cov=np.diag([1, 4.2]))
    init.set(2, 'diffuse')
    check_initialization(mod, init, [1.2, -0.2, 0], np.diag([0, 0, 1]),
                         np.diag([1, 4.2, 0]))
Exemple #24
0
def test_lutkepohl_information_criteria():
    # Setup dataset, use Lutkepohl data
    dta = pd.DataFrame(results_var_misc.lutkepohl_data,
                       columns=['inv', 'inc', 'consump'],
                       index=pd.date_range('1960-01-01',
                                           '1982-10-01',
                                           freq='QS'))

    dta['dln_inv'] = np.log(dta['inv']).diff()
    dta['dln_inc'] = np.log(dta['inc']).diff()
    dta['dln_consump'] = np.log(dta['consump']).diff()

    endog = dta.loc['1960-04-01':'1978-10-01',
                    ['dln_inv', 'dln_inc', 'dln_consump']]

    # AR model - SARIMAX
    # (use loglikelihood_burn=1 to mimic conditional MLE used by Stata's var
    # command).
    true = results_var_misc.lutkepohl_ar1_lustats
    mod = sarimax.SARIMAX(endog['dln_inv'],
                          order=(1, 0, 0),
                          trend='c',
                          loglikelihood_burn=1)
    res = mod.filter(true['params'])
    assert_allclose(res.llf, true['loglike'])
    # Test the Lutkepohl ICs
    # Note: for the Lutkepohl ICs, Stata only counts the AR coefficients as
    # estimated parameters for the purposes of information criteria, whereas we
    # count all parameters including scale and constant, so we need to adjust
    # for that
    aic = (res.info_criteria('aic', method='lutkepohl') -
           2 * 2 / res.nobs_effective)
    bic = (res.info_criteria('bic', method='lutkepohl') -
           2 * np.log(res.nobs_effective) / res.nobs_effective)
    hqic = (res.info_criteria('hqic', method='lutkepohl') -
            2 * 2 * np.log(np.log(res.nobs_effective)) / res.nobs_effective)
    assert_allclose(aic, true['aic'])
    assert_allclose(bic, true['bic'])
    assert_allclose(hqic, true['hqic'])

    # Test the non-Lutkepohl ICs
    # Note: for the non-Lutkepohl ICs, Stata does not count the scale as an
    # estimated parameter, but does count the constant term, for the
    # purposes of information criteria, whereas we count both, so we need to
    # adjust for that
    true = results_var_misc.lutkepohl_ar1
    aic = res.aic - 2
    bic = res.bic - np.log(res.nobs_effective)
    assert_allclose(aic, true['estat_aic'])
    assert_allclose(bic, true['estat_bic'])
    aic = res.info_criteria('aic') - 2
    bic = res.info_criteria('bic') - np.log(res.nobs_effective)
    assert_allclose(aic, true['estat_aic'])
    assert_allclose(bic, true['estat_bic'])

    # Note: could also test the "dfk" (degree of freedom corrections), but not
    # really necessary since they just rescale things a bit

    # VAR model - VARMAX
    # (use loglikelihood_burn=1 to mimic conditional MLE used by Stata's var
    # command).
    true = results_var_misc.lutkepohl_var1_lustats
    mod = varmax.VARMAX(
        endog,
        order=(1, 0),
        trend='n',
        error_cov_type='unstructured',
        loglikelihood_burn=1,
    )
    res = mod.filter(true['params'])
    assert_allclose(res.llf, true['loglike'])

    # Test the Lutkepohl ICs
    # Note: for the Lutkepohl ICs, Stata only counts the AR coefficients as
    # estimated parameters for the purposes of information criteria, whereas we
    # count all parameters including the elements of the covariance matrix, so
    # we need to adjust for that
    aic = (res.info_criteria('aic', method='lutkepohl') -
           2 * 6 / res.nobs_effective)
    bic = (res.info_criteria('bic', method='lutkepohl') -
           6 * np.log(res.nobs_effective) / res.nobs_effective)
    hqic = (res.info_criteria('hqic', method='lutkepohl') -
            2 * 6 * np.log(np.log(res.nobs_effective)) / res.nobs_effective)
    assert_allclose(aic, true['aic'])
    assert_allclose(bic, true['bic'])
    assert_allclose(hqic, true['hqic'])

    # Test the non-Lutkepohl ICs
    # Note: for the non-Lutkepohl ICs, Stata does not count the elements of the
    # covariance matrix as estimated parameters for the purposes of information
    # criteria, whereas we count both, so we need to adjust for that
    true = results_var_misc.lutkepohl_var1
    aic = res.aic - 2 * 6
    bic = res.bic - 6 * np.log(res.nobs_effective)
    assert_allclose(aic, true['estat_aic'])
    assert_allclose(bic, true['estat_bic'])
    aic = res.info_criteria('aic') - 2 * 6
    bic = res.info_criteria('bic') - 6 * np.log(res.nobs_effective)
    assert_allclose(aic, true['estat_aic'])
    assert_allclose(bic, true['estat_bic'])
Exemple #25
0
def test_structural():
    # Clear warnings
    structural.__warningregistry__ = {}

    np.random.seed(38947)
    nobs = 100
    eps = np.random.normal(size=nobs)
    exog = np.random.normal(size=nobs)

    eps1 = np.zeros(nobs)
    eps2 = np.zeros(nobs)
    eps2[49] = 1
    eps3 = np.zeros(nobs)
    eps3[50:] = 1

    # AR(1)
    mod1 = structural.UnobservedComponents([0], autoregressive=1)
    mod2 = sarimax.SARIMAX([0], order=(1, 0, 0))
    actual = mod1.simulate([1, 0.5],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # ARX(1)
    mod1 = structural.UnobservedComponents(np.zeros(nobs),
                                           exog=exog,
                                           autoregressive=1)
    mod2 = sarimax.SARIMAX(np.zeros(nobs), exog=exog, order=(1, 0, 0))
    actual = mod1.simulate([1, 0.5, 0.2],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod2.k_states))
    desired = mod2.simulate([0.2, 0.5, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # Irregular
    mod = structural.UnobservedComponents([0], 'irregular')
    actual = mod.simulate([1.],
                          nobs,
                          measurement_shocks=eps,
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, eps)

    # Fixed intercept
    # (in practice this is a deterministic constant, because an irregular
    #  component must be added)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod = structural.UnobservedComponents([0], 'fixed intercept')
    actual = mod.simulate([1.],
                          nobs,
                          measurement_shocks=eps,
                          initial_state=[10])
    assert_allclose(actual, 10 + eps)

    # Deterministic constant
    mod = structural.UnobservedComponents([0], 'deterministic constant')
    actual = mod.simulate([1.],
                          nobs,
                          measurement_shocks=eps,
                          initial_state=[10])
    assert_allclose(actual, 10 + eps)

    # Local level
    mod = structural.UnobservedComponents([0], 'local level')
    actual = mod.simulate([1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, eps + eps3)

    # Random walk
    mod = structural.UnobservedComponents([0], 'random walk')
    actual = mod.simulate([1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, eps + eps3)

    # Fixed slope
    # (in practice this is a deterministic trend, because an irregular
    #  component must be added)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod = structural.UnobservedComponents([0], 'fixed slope')
    actual = mod.simulate([1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=[0, 1])
    assert_allclose(actual, eps + np.arange(100))

    # Deterministic trend
    mod = structural.UnobservedComponents([0], 'deterministic trend')
    actual = mod.simulate([1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=[0, 1])
    assert_allclose(actual, eps + np.arange(100))

    # Local linear deterministic trend
    mod = structural.UnobservedComponents([0],
                                          'local linear deterministic trend')
    actual = mod.simulate([1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=[0, 1])
    desired = eps + np.r_[np.arange(50), 1 + np.arange(50, 100)]
    assert_allclose(actual, desired)

    # Random walk with drift
    mod = structural.UnobservedComponents([0], 'random walk with drift')
    actual = mod.simulate([1.], nobs, state_shocks=eps2, initial_state=[0, 1])
    desired = np.r_[np.arange(50), 1 + np.arange(50, 100)]
    assert_allclose(actual, desired)

    # Local linear trend
    mod = structural.UnobservedComponents([0], 'local linear trend')
    actual = mod.simulate([1., 1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=np.c_[eps2, eps1],
                          initial_state=[0, 1])
    desired = eps + np.r_[np.arange(50), 1 + np.arange(50, 100)]
    assert_allclose(actual, desired)

    actual = mod.simulate([1., 1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=np.c_[eps1, eps2],
                          initial_state=[0, 1])
    desired = eps + np.r_[np.arange(50), np.arange(50, 150, 2)]
    assert_allclose(actual, desired)

    # Smooth trend
    mod = structural.UnobservedComponents([0], 'smooth trend')
    actual = mod.simulate([1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps1,
                          initial_state=[0, 1])
    desired = eps + np.r_[np.arange(100)]
    assert_allclose(actual, desired)

    actual = mod.simulate([1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=[0, 1])
    desired = eps + np.r_[np.arange(50), np.arange(50, 150, 2)]
    assert_allclose(actual, desired)

    # Random trend
    mod = structural.UnobservedComponents([0], 'random trend')
    actual = mod.simulate([1., 1.],
                          nobs,
                          state_shocks=eps1,
                          initial_state=[0, 1])
    desired = np.r_[np.arange(100)]
    assert_allclose(actual, desired)

    actual = mod.simulate([1., 1.],
                          nobs,
                          state_shocks=eps2,
                          initial_state=[0, 1])
    desired = np.r_[np.arange(50), np.arange(50, 150, 2)]
    assert_allclose(actual, desired)

    # Seasonal (deterministic)
    mod = structural.UnobservedComponents([0],
                                          'irregular',
                                          seasonal=2,
                                          stochastic_seasonal=False)
    actual = mod.simulate([1.],
                          nobs,
                          measurement_shocks=eps,
                          initial_state=[10])
    desired = eps + np.tile([10, -10], 50)
    assert_allclose(actual, desired)

    # Seasonal (stochastic)
    mod = structural.UnobservedComponents([0], 'irregular', seasonal=2)
    actual = mod.simulate([1., 1.],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=eps2,
                          initial_state=[10])
    desired = eps + np.r_[np.tile([10, -10], 25), np.tile([11, -11], 25)]
    assert_allclose(actual, desired)

    # Cycle (deterministic)
    mod = structural.UnobservedComponents([0], 'irregular', cycle=True)
    actual = mod.simulate([1., 1.2],
                          nobs,
                          measurement_shocks=eps,
                          initial_state=[1, 0])
    x1 = [np.cos(1.2), np.sin(1.2)]
    x2 = [-np.sin(1.2), np.cos(1.2)]
    T = np.array([x1, x2])
    desired = eps
    states = [1, 0]
    for i in range(nobs):
        desired[i] += states[0]
        states = np.dot(T, states)
    assert_allclose(actual, desired)

    # Cycle (stochastic)
    mod = structural.UnobservedComponents([0],
                                          'irregular',
                                          cycle=True,
                                          stochastic_cycle=True)
    actual = mod.simulate([1., 1., 1.2],
                          nobs,
                          measurement_shocks=eps,
                          state_shocks=np.c_[eps2, eps2],
                          initial_state=[1, 0])
    x1 = [np.cos(1.2), np.sin(1.2)]
    x2 = [-np.sin(1.2), np.cos(1.2)]
    T = np.array([x1, x2])
    desired = eps
    states = [1, 0]
    for i in range(nobs):
        desired[i] += states[0]
        states = np.dot(T, states) + eps2[i]
    assert_allclose(actual, desired)
Exemple #26
0
def test_varmax():
    np.random.seed(371934)
    nobs = 100
    eps = np.random.normal(size=nobs)
    exog = np.random.normal(size=(nobs, 1))

    eps1 = np.zeros(nobs)
    eps2 = np.zeros(nobs)
    eps2[49] = 1
    eps3 = np.zeros(nobs)
    eps3[50:] = 1

    # VAR(2) - single series
    mod1 = varmax.VARMAX([[0]], order=(2, 0), trend='n')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 0))
    actual = mod1.simulate([0.5, 0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VMA(2) - single series
    mod1 = varmax.VARMAX([[0]], order=(0, 2), trend='n')
    mod2 = sarimax.SARIMAX([0], order=(0, 0, 2))
    actual = mod1.simulate([0.5, 0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VARMA(2, 2) - single series
    warning = EstimationWarning
    match = r'VARMA\(p,q\) models is not'
    with pytest.warns(warning, match=match):
        mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='n')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 2))
    actual = mod1.simulate([0.5, 0.2, 0.1, -0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([0.5, 0.2, 0.1, -0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VARMA(2, 2) + trend - single series
    warning = EstimationWarning
    match = r'VARMA\(p,q\) models is not'
    with pytest.warns(warning, match=match):
        mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='c')
    mod2 = sarimax.SARIMAX([0], order=(2, 0, 2), trend='c')
    actual = mod1.simulate([10, 0.5, 0.2, 0.1, -0.2, 1],
                           nobs,
                           state_shocks=eps,
                           initial_state=np.zeros(mod1.k_states))
    desired = mod2.simulate([10, 0.5, 0.2, 0.1, -0.2, 1],
                            nobs,
                            state_shocks=eps,
                            initial_state=np.zeros(mod2.k_states))
    assert_allclose(actual, desired)

    # VAR(1)
    transition = np.array([[0.5, 0.1], [-0.1, 0.2]])

    mod = varmax.VARMAX([[0, 0]], order=(1, 0), trend='n')
    actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.],
                          nobs,
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, 0)

    actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.],
                          nobs,
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=[1, 1])
    desired = np.zeros((nobs, 2))
    state = np.r_[1, 1]
    for i in range(nobs):
        desired[i] = state
        state = np.dot(transition, state)
    assert_allclose(actual, desired)

    # VAR(1) + measurement error
    mod = varmax.VARMAX([[0, 0]],
                        order=(1, 0),
                        trend='n',
                        measurement_error=True)
    actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1., 1., 1.],
                          nobs,
                          measurement_shocks=np.c_[eps, eps],
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=np.zeros(mod.k_states))
    assert_allclose(actual, np.c_[eps, eps])

    # VARX(1)
    mod = varmax.VARMAX(np.zeros((nobs, 2)),
                        order=(1, 0),
                        trend='n',
                        exog=exog)
    actual = mod.simulate(np.r_[transition.ravel(), 5, -2, 1., 0, 1.],
                          nobs,
                          state_shocks=np.c_[eps1, eps1],
                          initial_state=[1, 1])
    desired = np.zeros((nobs, 2))
    state = np.r_[1, 1]
    for i in range(nobs):
        desired[i] = state
        if i < nobs - 1:
            state = exog[i + 1] * [5, -2] + np.dot(transition, state)
    assert_allclose(actual, desired)

    # VMA(1)
    # TODO: This is just a smoke test
    mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)),
                        order=(0, 1),
                        trend='n')
    mod.simulate(mod.start_params, nobs)

    # VARMA(2, 2) + trend + exog
    # TODO: This is just a smoke test
    warning = EstimationWarning
    match = r"VARMA\(p,q\) models is not"
    with pytest.warns(warning, match=match):
        mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)),
                            order=(2, 2),
                            trend='c',
                            exog=exog)
    mod.simulate(mod.start_params, nobs)
Exemple #27
0
def test_arma_direct():
    # Tests of an ARMA model simulation against direct construction
    # This is useful for e.g. trend components
    # Note: the first elements of the generated SARIMAX datasets are based on
    # the initial state, so we don't include them in the comparisons
    np.random.seed(10239)
    nobs = 100
    eps = np.random.normal(size=nobs)
    exog = np.random.normal(size=nobs)

    # AR(1)
    mod = sarimax.SARIMAX([0], order=(1, 0, 0))
    actual = mod.simulate([0.5, 1.],
                          nobs + 1,
                          state_shocks=np.r_[eps, 0],
                          initial_state=np.zeros(mod.k_states))
    desired = np.zeros(nobs)
    for i in range(nobs):
        if i == 0:
            desired[i] = eps[i]
        else:
            desired[i] = 0.5 * desired[i - 1] + eps[i]
    assert_allclose(actual[1:], desired)

    # MA(1)
    mod = sarimax.SARIMAX([0], order=(0, 0, 1))
    actual = mod.simulate([0.5, 1.],
                          nobs + 1,
                          state_shocks=np.r_[eps, 0],
                          initial_state=np.zeros(mod.k_states))
    desired = np.zeros(nobs)
    for i in range(nobs):
        if i == 0:
            desired[i] = eps[i]
        else:
            desired[i] = 0.5 * eps[i - 1] + eps[i]
    assert_allclose(actual[1:], desired)

    # ARMA(1, 1)
    mod = sarimax.SARIMAX([0], order=(1, 0, 1))
    actual = mod.simulate([0.5, 0.2, 1.],
                          nobs + 1,
                          state_shocks=np.r_[eps, 0],
                          initial_state=np.zeros(mod.k_states))
    desired = np.zeros(nobs)
    for i in range(nobs):
        if i == 0:
            desired[i] = eps[i]
        else:
            desired[i] = 0.5 * desired[i - 1] + 0.2 * eps[i - 1] + eps[i]
    assert_allclose(actual[1:], desired)

    # ARMA(1, 1) + intercept
    mod = sarimax.SARIMAX([0], order=(1, 0, 1), trend='c')
    actual = mod.simulate([1.3, 0.5, 0.2, 1.],
                          nobs + 1,
                          state_shocks=np.r_[eps, 0],
                          initial_state=np.zeros(mod.k_states))
    desired = np.zeros(nobs)
    for i in range(nobs):
        trend = 1.3
        if i == 0:
            desired[i] = trend + eps[i]
        else:
            desired[i] = (trend + 0.5 * desired[i - 1] + 0.2 * eps[i - 1] +
                          eps[i])
    assert_allclose(actual[1:], desired)

    # ARMA(1, 1) + intercept + time trend
    # Note: to allow time-varying SARIMAX to simulate 101 observations, need to
    # give it 101 observations up front
    mod = sarimax.SARIMAX(np.zeros(nobs + 1), order=(1, 0, 1), trend='ct')
    actual = mod.simulate([1.3, 0.2, 0.5, 0.2, 1.],
                          nobs + 1,
                          state_shocks=np.r_[eps, 0],
                          initial_state=np.zeros(mod.k_states))
    desired = np.zeros(nobs)
    for i in range(nobs):
        trend = 1.3 + 0.2 * (i + 1)
        if i == 0:
            desired[i] = trend + eps[i]
        else:
            desired[i] = (trend + 0.5 * desired[i - 1] + 0.2 * eps[i - 1] +
                          eps[i])
    assert_allclose(actual[1:], desired)

    # ARMA(1, 1) + intercept + time trend + exog
    # Note: to allow time-varying SARIMAX to simulate 101 observations, need to
    # give it 101 observations up front
    # Note: the model is regression with SARIMAX errors, so the exog is
    # introduced into the observation equation rather than the ARMA part
    mod = sarimax.SARIMAX(np.zeros(nobs + 1),
                          exog=np.r_[0, exog],
                          order=(1, 0, 1),
                          trend='ct')
    actual = mod.simulate([1.3, 0.2, -0.5, 0.5, 0.2, 1.],
                          nobs + 1,
                          state_shocks=np.r_[eps, 0],
                          initial_state=np.zeros(mod.k_states))
    desired = np.zeros(nobs)
    for i in range(nobs):
        trend = 1.3 + 0.2 * (i + 1)
        if i == 0:
            desired[i] = trend + eps[i]
        else:
            desired[i] = (trend + 0.5 * desired[i - 1] + 0.2 * eps[i - 1] +
                          eps[i])
    desired = desired - 0.5 * exog
    assert_allclose(actual[1:], desired)
    def test_seasonal_arima(self):

        model_combin = [[(3, 1, 1), (3, 1, 1, 12), 't', True, True, False,
                         True, False, False, True, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', True, True, False,
                         False, False, False, False, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', True, False, True,
                         True, False, False, True, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', True, False, True,
                         False, False, False, False, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', False, True, False,
                         True, False, False, True, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', False, True, False,
                         False, False, False, False, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', False, False, True,
                         True, False, False, True, False],
                        [(3, 1, 1), (3, 1, 1, 12), 't', False, False, True,
                         False, False, False, False, False]]

        # no of cars sold
        data = [
            112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115,
            126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, 145, 150,
            178, 163, 172, 178, 199, 199, 184, 162, 146, 166, 171, 180, 193,
            181, 183, 218, 230, 242, 209, 191, 172, 194, 196, 196, 236, 235,
            229, 243, 264, 272, 237, 211, 180, 201, 204, 188, 235, 227, 234,
            264, 302, 293, 259, 229, 203, 229, 242, 233, 267, 269, 270, 315,
            364, 347, 312, 274, 237, 278, 284, 277, 317, 313, 318, 374, 413,
            405, 355, 306, 271, 306, 315, 301, 356, 348, 355, 422, 465, 467,
            404, 347, 305, 336, 340, 318, 362, 348, 363, 435, 491, 505, 404,
            359, 310, 337, 360, 342, 406, 396, 420, 472, 548, 559, 463, 407,
            362, 405, 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390,
            432
        ]

        index = pd.DatetimeIndex(start='1949-01-01',
                                 end='1960-12-01',
                                 freq='MS')
        ts_data = pd.Series(data, index)
        ts_data.index.name = 'datetime_index'
        ts_data.name = 'n_passengers'

        c = 0
        for x in model_combin:
            try:
                model = sarimax.SARIMAX(endog=ts_data,
                                        exog=None,
                                        order=x[0],
                                        seasonal_order=x[1],
                                        trend=x[2],
                                        measurement_error=x[3],
                                        time_varying_regression=x[4],
                                        mle_regression=x[5],
                                        simple_differencing=x[6],
                                        enforce_stationarity=x[7],
                                        enforce_invertibility=x[8],
                                        hamilton_representation=x[9],
                                        concentrate_scale=x[10])

                result = model.fit()
                try:
                    c = c + 1
                    file_name = 'seasonal_arima' + str(c) + '.pmml'
                    ArimaToPMML(ts_data, model, result, file_name)
                except:
                    continue
                finally:
                    exported = os.path.isfile(file_name)
                    self.assertEqual(exported, True)
                    if (not exported):
                        break
            except:
                continue
Exemple #29
0
def test_impulse_responses():
    # Test for impulse response functions

    # Random walk: 1-unit impulse response (i.e. non-orthogonalized irf) is 1
    # for all periods
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.
    mod['state_cov', 0, 0] = 2.

    actual = mod.impulse_responses(steps=10)
    desired = np.ones((11, 1))

    assert_allclose(actual, desired)

    # Random walk: 2-unit impulse response (i.e. non-orthogonalized irf) is 2
    # for all periods
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.
    mod['state_cov', 0, 0] = 2.

    actual = mod.impulse_responses(steps=10, impulse=[2])
    desired = np.ones((11, 1)) * 2

    assert_allclose(actual, desired)

    # Random walk: 1-standard-deviation response (i.e. orthogonalized irf) is
    # sigma for all periods (here sigma^2 = 2)
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.
    mod['state_cov', 0, 0] = 2.

    actual = mod.impulse_responses(steps=10, orthogonalized=True)
    desired = np.ones((11, 1)) * 2**0.5

    assert_allclose(actual, desired)

    # Random walk: 1-standard-deviation cumulative response (i.e. cumulative
    # orthogonalized irf)
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['design', 0, 0] = 1.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.
    mod['state_cov', 0, 0] = 2.

    actual = mod.impulse_responses(steps=10,
                                   orthogonalized=True,
                                   cumulative=True)
    desired = np.cumsum(np.ones((11, 1)) * 2**0.5)[:, np.newaxis]

    actual = mod.impulse_responses(steps=10,
                                   impulse=[1],
                                   orthogonalized=True,
                                   cumulative=True)
    desired = np.cumsum(np.ones((11, 1)) * 2**0.5)[:, np.newaxis]

    assert_allclose(actual, desired)

    # Random walk: 1-unit impulse response (i.e. non-orthogonalized irf) is 1
    # for all periods, even when intercepts are present
    mod = KalmanFilter(k_endog=1, k_states=1)
    mod['state_intercept', 0] = 100.
    mod['design', 0, 0] = 1.
    mod['obs_intercept', 0] = -1000.
    mod['transition', 0, 0] = 1.
    mod['selection', 0, 0] = 1.
    mod['state_cov', 0, 0] = 2.

    actual = mod.impulse_responses(steps=10)
    desired = np.ones((11, 1))

    assert_allclose(actual, desired)

    # Univariate model (random walk): test that an error is thrown when
    # a multivariate or empty "impulse" is sent
    mod = KalmanFilter(k_endog=1, k_states=1)
    assert_raises(ValueError, mod.impulse_responses, impulse=1)
    assert_raises(ValueError, mod.impulse_responses, impulse=[1, 1])
    assert_raises(ValueError, mod.impulse_responses, impulse=[])

    # Univariate model with two uncorrelated shocks
    mod = KalmanFilter(k_endog=1, k_states=2)
    mod['design', 0, 0:2] = 1.
    mod['transition', :, :] = np.eye(2)
    mod['selection', :, :] = np.eye(2)
    mod['state_cov', :, :] = np.eye(2)

    desired = np.ones((11, 1))

    actual = mod.impulse_responses(steps=10, impulse=0)
    assert_allclose(actual, desired)

    actual = mod.impulse_responses(steps=10, impulse=[1, 0])
    assert_allclose(actual, desired)

    actual = mod.impulse_responses(steps=10, impulse=1)
    assert_allclose(actual, desired)

    actual = mod.impulse_responses(steps=10, impulse=[0, 1])
    assert_allclose(actual, desired)

    # In this case (with sigma=sigma^2=1), orthogonalized is the same as not
    actual = mod.impulse_responses(steps=10, impulse=0, orthogonalized=True)
    assert_allclose(actual, desired)

    actual = mod.impulse_responses(steps=10,
                                   impulse=[1, 0],
                                   orthogonalized=True)
    assert_allclose(actual, desired)

    actual = mod.impulse_responses(steps=10,
                                   impulse=[0, 1],
                                   orthogonalized=True)
    assert_allclose(actual, desired)

    # Univariate model with two correlated shocks
    mod = KalmanFilter(k_endog=1, k_states=2)
    mod['design', 0, 0:2] = 1.
    mod['transition', :, :] = np.eye(2)
    mod['selection', :, :] = np.eye(2)
    mod['state_cov', :, :] = np.array([[1, 0.5], [0.5, 1.25]])

    desired = np.ones((11, 1))

    # Non-orthogonalized (i.e. 1-unit) impulses still just generate 1's
    actual = mod.impulse_responses(steps=10, impulse=0)
    assert_allclose(actual, desired)

    actual = mod.impulse_responses(steps=10, impulse=1)
    assert_allclose(actual, desired)

    # Orthogonalized (i.e. 1-std-dev) impulses now generate different responses
    actual = mod.impulse_responses(steps=10, impulse=0, orthogonalized=True)
    assert_allclose(actual, desired + desired * 0.5)

    actual = mod.impulse_responses(steps=10, impulse=1, orthogonalized=True)
    assert_allclose(actual, desired)

    # Multivariate model with two correlated shocks
    mod = KalmanFilter(k_endog=2, k_states=2)
    mod['design', :, :] = np.eye(2)
    mod['transition', :, :] = np.eye(2)
    mod['selection', :, :] = np.eye(2)
    mod['state_cov', :, :] = np.array([[1, 0.5], [0.5, 1.25]])

    ones = np.ones((11, 1))
    zeros = np.zeros((11, 1))

    # Non-orthogonalized (i.e. 1-unit) impulses still just generate 1's, but
    # only for the appropriate series
    actual = mod.impulse_responses(steps=10, impulse=0)
    assert_allclose(actual, np.c_[ones, zeros])

    actual = mod.impulse_responses(steps=10, impulse=1)
    assert_allclose(actual, np.c_[zeros, ones])

    # Orthogonalized (i.e. 1-std-dev) impulses now generate different
    # responses, and only for the appropriate series
    actual = mod.impulse_responses(steps=10, impulse=0, orthogonalized=True)
    assert_allclose(actual, np.c_[ones, ones * 0.5])

    actual = mod.impulse_responses(steps=10, impulse=1, orthogonalized=True)
    assert_allclose(actual, np.c_[zeros, ones])

    # AR(1) model generates a geometrically declining series
    mod = sarimax.SARIMAX([0.1, 0.5, -0.2], order=(1, 0, 0))
    phi = 0.5
    mod.update([phi, 1])

    desired = np.cumprod(np.r_[1, [phi] * 10])

    # Test going through the model directly
    actual = mod.ssm.impulse_responses(steps=10)
    assert_allclose(actual[:, 0], desired)

    # Test going through the results object
    res = mod.filter([phi, 1.])
    actual = res.impulse_responses(steps=10)
    assert_allclose(actual, desired)
Exemple #30
0
                                         seasonality=[365, 30, 7])
plt.plot(multi_adj_series)

plt.hist(multi_adj_series)


def make_date_range(start_date, end_date, date_format='%Y-%m-%d', increment=1):
    start = datetime.datetime.strptime(start_date, date_format)
    end = datetime.datetime.strptime(end_date, date_format)
    delta = datetime.timedelta(days=increment)
    date_range = []
    while start < end:
        date_range.append(start.date())
        start += delta
    return date_range


ts_df = pd.DataFrame({'value': tseries},
                     index=make_date_range('2015-01-01', '2019-12-31'))

define_sarima = sarima.SARIMAX(ts_df,
                               order=(1, 0, 0),
                               seasonal_order=(0, 1, 0, 1),
                               freq='D')

fit_sarima = define_sarima.fit()
fit_sarima.summary()
prediction_range = make_date_range('2020-01-01', '2024-12-31')
pred = fit_sarima.predict(start=prediction_range[0], end=prediction_range[-1])
plt.plot(pred)