Beispiel #1
0
def test_smoke(data):
    stlf = STLForecast(data, ARIMA, model_kwargs={"order": (2, 0, 0)})
    res = stlf.fit(fit_kwargs={})
    res.forecast(37)
    assert isinstance(res.summary().as_text(), str)
    assert isinstance(res.stl, STL)
    assert isinstance(res.result, DecomposeResult)
    assert isinstance(res.model, ARIMA)
    assert hasattr(res.model_result, "forecast")
Beispiel #2
0
def test_get_prediction(sunspots):
    # GH7309
    stlf_model = STLForecast(sunspots,
                             model=ARIMA,
                             model_kwargs={"order": (2, 2, 0)},
                             period=11)
    stlf_res = stlf_model.fit()
    pred = stlf_res.get_prediction()
    assert pred.predicted_mean.shape == (309, )
    assert pred.var_pred_mean.shape == (309, )
Beispiel #3
0
def test_get_prediction():
    # GH7309
    df = sunspots.load_pandas().data
    df.index = np.arange(df.shape[0])
    y = df.iloc[:, 0]
    stlf_model = STLForecast(
        y, model=ARIMA, model_kwargs={"order": (2, 2, 0)}, period=11
    )
    stlf_res = stlf_model.fit()
    pred = stlf_res.get_prediction()
    assert pred.predicted_mean.shape == (309,)
    assert pred.var_pred_mean.shape == (309,)
Beispiel #4
0
def sarima_forecast(df,
                    config,
                    weektopredict=1,
                    decomposition=False,
                    box_cox=0,
                    rmv_outliers=True):
    dateiso = []
    for week in df.index:
        dateiso.append(dateutil.parser.isoparse(week))
    dateiso = pd.DatetimeIndex(dateiso).to_period('W')

    df_o = df.copy()
    if rmv_outliers:
        df_o = remove_outliers(df)
    if box_cox != 0:
        df_o = box_cox_transformation(df_o.copy(), box_cox)
    series = pd.Series(data=df_o['vendite'].values, index=dateiso)
    config = list(config)
    order = config[0]
    sorder = config[1]
    trend = config[2]

    if decomposition:
        model = STLForecast(series,
                            SARIMAX,
                            period=26,
                            model_kwargs=dict(order=order,
                                              seasonal_order=sorder,
                                              trend=trend))
        model_fit = model.fit(fit_kwargs=dict(disp=False))
    else:
        model = SARIMAX(series,
                        order=order,
                        seasonal_order=sorder,
                        trend=trend)
        model_fit = model.fit(disp=False)
    if box_cox != 0:
        predict = box_cox_transformation(model_fit.forecast(weektopredict),
                                         box_cox,
                                         reverse=True)
    else:
        predict = model_fit.forecast(weektopredict)
    week = df.index[df.index.size - 1]
    for i in range(0, weektopredict):
        week = add_week(week, 1)
        df.loc[week] = predict.iloc[i]
    return df
Beispiel #5
0
def make_out_of_sample_df(past_data, model, periods=24 * 30):
    """
    Sample function to create a dataframe with predicted values for the regressors using STL and ARIMA
    Likely does not run very efficiently.
    Reference: https://www.statsmodels.org/stable/examples/notebooks/generated/stl_decomposition.html#Forecasting-with-STL
    ----------
    Input
        - past_data: data used ing training Prophet model
        - model: trained model of the Prophet class.

    Returns
        - A panda dataframe with predicted values for regressors to use for forecasting
    """
    future_time_df = model.make_future_dataframe(periods=periods,
                                                 freq="H",
                                                 include_history=False)
    # Creating dummy for working days
    canada_holidays = holidays.CA()
    future_time_df["holiday"] = [
        1 if i.date() in canada_holidays else 0 for i in future_time_df["ds"]
    ]
    future_time_df["workingday"] = future_time_df.apply(lambda row: 0 if row[
        "holiday"] == 1 or row["ds"].weekday in ['Saturday', 'Sunday'] else 1,
                                                        axis=1)

    future_time_df.drop(columns=["holiday"], inplace=True)

    # Create predictions for future values of continuous regressors
    non_binary_regressors = [
        "Calgary_temp.1_hour_lag", "Edmonton_temp.1_hour_lag",
        "FortMM_temp.1_hour_lag", "Lethbridge_temp.1_hour_lag", "future 1",
        "WTI spot"
    ]
    past_data_copy = past_data.drop(columns=["y"]).set_index("ds")
    past_data_copy.index.freq = "H"
    for var in non_binary_regressors:
        var_forecast = STLForecast(past_data_copy[var],
                                   ARIMA,
                                   model_kwargs=dict(order=(1, 1, 0),
                                                     trend="t")).fit()
        future_time_df[var] = var_forecast.forecast(periods)
    future_time_df.reset_index(inplace=True)
    return future_time_df
Beispiel #6
0
def seasonalExp_smoothing(df,
                          week_to_predict=1,
                          decompositon=False,
                          box_cox=0,
                          rmv_outliers=True):
    dateiso = []
    for week in df.index:
        dateiso.append(dateutil.parser.isoparse(week))
    dateiso = pd.DatetimeIndex(dateiso).to_period('W')

    df_o = df.copy()
    if rmv_outliers:
        df_o = remove_outliers(df)
    if box_cox != 0:
        df_o = box_cox_transformation(df_o.copy(), box_cox)
    series = pd.Series(data=df_o['vendite'].values, index=dateiso)

    if decompositon:
        model = STLForecast(series,
                            ExponentialSmoothing,
                            period=26,
                            model_kwargs=dict(
                                seasonal_periods=26,
                                seasonal='add',
                                initialization_method="estimated"))
        model_fitted = model.fit()
    else:
        model = ExponentialSmoothing(series,
                                     seasonal_periods=26,
                                     seasonal='add',
                                     initialization_method='estimated')
        model_fitted = model.fit()
    if box_cox != 0:
        predict = box_cox_transformation(
            model_fitted.forecast(week_to_predict), box_cox, reverse=True)
    else:
        predict = model_fitted.forecast(week_to_predict)
    week = df.index[df.index.size - 1]
    for i in range(0, week_to_predict):
        week = add_week(week, 1)
        df.loc[week] = predict.iloc[i]
    return df
Beispiel #7
0
def test_no_var_pred(sunspots, not_implemented):
    class DummyPred:
        def __init__(self, predicted_mean, row_labels):
            self.predicted_mean = predicted_mean
            self.row_labels = row_labels

            def f():
                raise NotImplementedError

            if not_implemented:
                self.forecast = property(f)

    class DummyRes:
        def __init__(self, res):
            self._res = res

        def forecast(self, *args, **kwargs):
            return self._res.forecast(*args, **kwargs)

        def get_prediction(self, *args, **kwargs):
            pred = self._res.get_prediction(*args, **kwargs)

            return DummyPred(pred.predicted_mean, pred.row_labels)

    class DummyMod:
        def __init__(self, y):
            self._mod = ARIMA(y)

        def fit(self, *args, **kwargs):
            res = self._mod.fit(*args, **kwargs)
            return DummyRes(res)

    stl_mod = STLForecast(sunspots, model=DummyMod, period=11)
    stl_res = stl_mod.fit()
    with pytest.warns(UserWarning, match="The variance of"):
        pred = stl_res.get_prediction()
    assert np.all(np.isnan(pred.var_pred_mean))
Beispiel #8
0
def test_exceptions(data):
    class BadModel:
        def __init__(self, *args, **kwargs):
            pass

    with pytest.raises(AttributeError, match="model must expose"):
        STLForecast(data, BadModel)

    class NoForecast(BadModel):
        def fit(self, *args, **kwargs):
            return BadModel()

    with pytest.raises(AttributeError, match="The model's result"):
        STLForecast(data, NoForecast).fit()

    class BadResult:
        def forecast(self, *args, **kwargs):
            pass

    class FakeModel(BadModel):
        def fit(self, *args, **kwargs):
            return BadResult()

    with pytest.raises(AttributeError, match="The model result does not"):
        STLForecast(data, FakeModel).fit().summary()

    class BadResultSummary(BadResult):
        def summary(self, *args, **kwargs):
            return object()

    class FakeModelSummary(BadModel):
        def fit(self, *args, **kwargs):
            return BadResultSummary()

    with pytest.raises(TypeError, match="The model result's summary"):
        STLForecast(data, FakeModelSummary).fit().summary()
Beispiel #9
0
def test_equivalence_forecast(data, config, horizon):
    model, kwargs = config

    stl = STL(data)
    stl_fit = stl.fit()
    resids = data - stl_fit.seasonal
    mod = model(resids, **kwargs)
    fit_kwarg = {}
    if model is ETSModel:
        fit_kwarg["disp"] = False
    res = mod.fit(**fit_kwarg)
    stlf = STLForecast(data, model,
                       model_kwargs=kwargs).fit(fit_kwargs=fit_kwarg)

    seasonal = np.asarray(stl_fit.seasonal)[-12:]
    seasonal = np.tile(seasonal, 1 + horizon // 12)
    fcast = res.forecast(horizon) + seasonal[:horizon]
    actual = stlf.forecast(horizon)
    assert_allclose(actual, fcast, rtol=1e-4)
    if not hasattr(res, "get_prediction"):
        return
    pred = stlf.get_prediction(data.shape[0], data.shape[0] + horizon - 1)
    assert isinstance(pred, PredictionResults)
    assert_allclose(pred.predicted_mean, fcast, rtol=1e-4)

    half = data.shape[0] // 2
    stlf.get_prediction(half, data.shape[0] + horizon - 1)
    stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=True)
    stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=half // 2)
    if hasattr(data, "index"):
        loc = data.index[half + half // 2]
        a = stlf.get_prediction(half,
                                data.shape[0] + horizon - 1,
                                dynamic=loc.strftime("%Y-%m-%d"))
        b = stlf.get_prediction(half,
                                data.shape[0] + horizon - 1,
                                dynamic=loc.to_pydatetime())
        c = stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=loc)
        assert_allclose(a.predicted_mean, b.predicted_mean, rtol=1e-4)
        assert_allclose(a.predicted_mean, c.predicted_mean, rtol=1e-4)
    print("The roots of denominators are: ", np.real(root_a[i]))

#%% ARIMA(1,1,1)

# ARIMA(1,1,1)
d = 1
p = 1
q = 1
from statsmodels.tsa.forecasting.stl import STLForecast
from statsmodels.tsa.arima.model import ARIMA

# getting index freq
train.index.freq = train.index.inferred_freq

# apply ARIMA model in stlf command
stlf = STLForecast(train, ARIMA, model_kwargs=dict(order=(p, d, q)))
# fit the model
stlf_res = stlf.fit()
# make forecasts
forecast = stlf_res.forecast(len(test))
# model summary
stlf_res.summary()

#%% ------------------------------------------- Testing set statistics -------------------------------------------------------
# estimated variance
na = stlf_res.model.k_ar
nb = stlf_res.model.k_ma

errors = np.array(test) - np.array(forecast)
SSE_test = np.square(errors).sum()
MSE_test = np.square(errors).mean()
# seasonalities and then using a standard time-series model to forecast the
# trend and cyclical components.
#
# Here we use STL to handle the seasonality and then an ARIMA(1,1,0) to
# model the deseasonalized data. The seasonal component is forecast from the
# find full cycle where
#
# $$E[S_{T+h}|\mathcal{F}_T]=\hat{S}_{T-k}$$
#
# where $k= m - h + m \lfloor \frac{h-1}{m} \rfloor$. The forecast
# automatically adds the seasonal component forecast to the ARIMA forecast.

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.forecasting.stl import STLForecast

elec_equip.index.freq = elec_equip.index.inferred_freq
stlf = STLForecast(elec_equip,
                   ARIMA,
                   model_kwargs=dict(order=(1, 1, 0), trend="t"))
stlf_res = stlf.fit()

forecast = stlf_res.forecast(24)
plt.plot(elec_equip)
plt.plot(forecast)
plt.show()

# ``summary`` contains information about both the time-series model and
# the STL decomposition.

print(stlf_res.summary())