def test_smoke(data): stlf = STLForecast(data, ARIMA, model_kwargs={"order": (2, 0, 0)}) res = stlf.fit(fit_kwargs={}) res.forecast(37) assert isinstance(res.summary().as_text(), str) assert isinstance(res.stl, STL) assert isinstance(res.result, DecomposeResult) assert isinstance(res.model, ARIMA) assert hasattr(res.model_result, "forecast")
def test_get_prediction(sunspots): # GH7309 stlf_model = STLForecast(sunspots, model=ARIMA, model_kwargs={"order": (2, 2, 0)}, period=11) stlf_res = stlf_model.fit() pred = stlf_res.get_prediction() assert pred.predicted_mean.shape == (309, ) assert pred.var_pred_mean.shape == (309, )
def test_get_prediction(): # GH7309 df = sunspots.load_pandas().data df.index = np.arange(df.shape[0]) y = df.iloc[:, 0] stlf_model = STLForecast( y, model=ARIMA, model_kwargs={"order": (2, 2, 0)}, period=11 ) stlf_res = stlf_model.fit() pred = stlf_res.get_prediction() assert pred.predicted_mean.shape == (309,) assert pred.var_pred_mean.shape == (309,)
def sarima_forecast(df, config, weektopredict=1, decomposition=False, box_cox=0, rmv_outliers=True): dateiso = [] for week in df.index: dateiso.append(dateutil.parser.isoparse(week)) dateiso = pd.DatetimeIndex(dateiso).to_period('W') df_o = df.copy() if rmv_outliers: df_o = remove_outliers(df) if box_cox != 0: df_o = box_cox_transformation(df_o.copy(), box_cox) series = pd.Series(data=df_o['vendite'].values, index=dateiso) config = list(config) order = config[0] sorder = config[1] trend = config[2] if decomposition: model = STLForecast(series, SARIMAX, period=26, model_kwargs=dict(order=order, seasonal_order=sorder, trend=trend)) model_fit = model.fit(fit_kwargs=dict(disp=False)) else: model = SARIMAX(series, order=order, seasonal_order=sorder, trend=trend) model_fit = model.fit(disp=False) if box_cox != 0: predict = box_cox_transformation(model_fit.forecast(weektopredict), box_cox, reverse=True) else: predict = model_fit.forecast(weektopredict) week = df.index[df.index.size - 1] for i in range(0, weektopredict): week = add_week(week, 1) df.loc[week] = predict.iloc[i] return df
def make_out_of_sample_df(past_data, model, periods=24 * 30): """ Sample function to create a dataframe with predicted values for the regressors using STL and ARIMA Likely does not run very efficiently. Reference: https://www.statsmodels.org/stable/examples/notebooks/generated/stl_decomposition.html#Forecasting-with-STL ---------- Input - past_data: data used ing training Prophet model - model: trained model of the Prophet class. Returns - A panda dataframe with predicted values for regressors to use for forecasting """ future_time_df = model.make_future_dataframe(periods=periods, freq="H", include_history=False) # Creating dummy for working days canada_holidays = holidays.CA() future_time_df["holiday"] = [ 1 if i.date() in canada_holidays else 0 for i in future_time_df["ds"] ] future_time_df["workingday"] = future_time_df.apply(lambda row: 0 if row[ "holiday"] == 1 or row["ds"].weekday in ['Saturday', 'Sunday'] else 1, axis=1) future_time_df.drop(columns=["holiday"], inplace=True) # Create predictions for future values of continuous regressors non_binary_regressors = [ "Calgary_temp.1_hour_lag", "Edmonton_temp.1_hour_lag", "FortMM_temp.1_hour_lag", "Lethbridge_temp.1_hour_lag", "future 1", "WTI spot" ] past_data_copy = past_data.drop(columns=["y"]).set_index("ds") past_data_copy.index.freq = "H" for var in non_binary_regressors: var_forecast = STLForecast(past_data_copy[var], ARIMA, model_kwargs=dict(order=(1, 1, 0), trend="t")).fit() future_time_df[var] = var_forecast.forecast(periods) future_time_df.reset_index(inplace=True) return future_time_df
def seasonalExp_smoothing(df, week_to_predict=1, decompositon=False, box_cox=0, rmv_outliers=True): dateiso = [] for week in df.index: dateiso.append(dateutil.parser.isoparse(week)) dateiso = pd.DatetimeIndex(dateiso).to_period('W') df_o = df.copy() if rmv_outliers: df_o = remove_outliers(df) if box_cox != 0: df_o = box_cox_transformation(df_o.copy(), box_cox) series = pd.Series(data=df_o['vendite'].values, index=dateiso) if decompositon: model = STLForecast(series, ExponentialSmoothing, period=26, model_kwargs=dict( seasonal_periods=26, seasonal='add', initialization_method="estimated")) model_fitted = model.fit() else: model = ExponentialSmoothing(series, seasonal_periods=26, seasonal='add', initialization_method='estimated') model_fitted = model.fit() if box_cox != 0: predict = box_cox_transformation( model_fitted.forecast(week_to_predict), box_cox, reverse=True) else: predict = model_fitted.forecast(week_to_predict) week = df.index[df.index.size - 1] for i in range(0, week_to_predict): week = add_week(week, 1) df.loc[week] = predict.iloc[i] return df
def test_no_var_pred(sunspots, not_implemented): class DummyPred: def __init__(self, predicted_mean, row_labels): self.predicted_mean = predicted_mean self.row_labels = row_labels def f(): raise NotImplementedError if not_implemented: self.forecast = property(f) class DummyRes: def __init__(self, res): self._res = res def forecast(self, *args, **kwargs): return self._res.forecast(*args, **kwargs) def get_prediction(self, *args, **kwargs): pred = self._res.get_prediction(*args, **kwargs) return DummyPred(pred.predicted_mean, pred.row_labels) class DummyMod: def __init__(self, y): self._mod = ARIMA(y) def fit(self, *args, **kwargs): res = self._mod.fit(*args, **kwargs) return DummyRes(res) stl_mod = STLForecast(sunspots, model=DummyMod, period=11) stl_res = stl_mod.fit() with pytest.warns(UserWarning, match="The variance of"): pred = stl_res.get_prediction() assert np.all(np.isnan(pred.var_pred_mean))
def test_exceptions(data): class BadModel: def __init__(self, *args, **kwargs): pass with pytest.raises(AttributeError, match="model must expose"): STLForecast(data, BadModel) class NoForecast(BadModel): def fit(self, *args, **kwargs): return BadModel() with pytest.raises(AttributeError, match="The model's result"): STLForecast(data, NoForecast).fit() class BadResult: def forecast(self, *args, **kwargs): pass class FakeModel(BadModel): def fit(self, *args, **kwargs): return BadResult() with pytest.raises(AttributeError, match="The model result does not"): STLForecast(data, FakeModel).fit().summary() class BadResultSummary(BadResult): def summary(self, *args, **kwargs): return object() class FakeModelSummary(BadModel): def fit(self, *args, **kwargs): return BadResultSummary() with pytest.raises(TypeError, match="The model result's summary"): STLForecast(data, FakeModelSummary).fit().summary()
def test_equivalence_forecast(data, config, horizon): model, kwargs = config stl = STL(data) stl_fit = stl.fit() resids = data - stl_fit.seasonal mod = model(resids, **kwargs) fit_kwarg = {} if model is ETSModel: fit_kwarg["disp"] = False res = mod.fit(**fit_kwarg) stlf = STLForecast(data, model, model_kwargs=kwargs).fit(fit_kwargs=fit_kwarg) seasonal = np.asarray(stl_fit.seasonal)[-12:] seasonal = np.tile(seasonal, 1 + horizon // 12) fcast = res.forecast(horizon) + seasonal[:horizon] actual = stlf.forecast(horizon) assert_allclose(actual, fcast, rtol=1e-4) if not hasattr(res, "get_prediction"): return pred = stlf.get_prediction(data.shape[0], data.shape[0] + horizon - 1) assert isinstance(pred, PredictionResults) assert_allclose(pred.predicted_mean, fcast, rtol=1e-4) half = data.shape[0] // 2 stlf.get_prediction(half, data.shape[0] + horizon - 1) stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=True) stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=half // 2) if hasattr(data, "index"): loc = data.index[half + half // 2] a = stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=loc.strftime("%Y-%m-%d")) b = stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=loc.to_pydatetime()) c = stlf.get_prediction(half, data.shape[0] + horizon - 1, dynamic=loc) assert_allclose(a.predicted_mean, b.predicted_mean, rtol=1e-4) assert_allclose(a.predicted_mean, c.predicted_mean, rtol=1e-4)
print("The roots of denominators are: ", np.real(root_a[i])) #%% ARIMA(1,1,1) # ARIMA(1,1,1) d = 1 p = 1 q = 1 from statsmodels.tsa.forecasting.stl import STLForecast from statsmodels.tsa.arima.model import ARIMA # getting index freq train.index.freq = train.index.inferred_freq # apply ARIMA model in stlf command stlf = STLForecast(train, ARIMA, model_kwargs=dict(order=(p, d, q))) # fit the model stlf_res = stlf.fit() # make forecasts forecast = stlf_res.forecast(len(test)) # model summary stlf_res.summary() #%% ------------------------------------------- Testing set statistics ------------------------------------------------------- # estimated variance na = stlf_res.model.k_ar nb = stlf_res.model.k_ma errors = np.array(test) - np.array(forecast) SSE_test = np.square(errors).sum() MSE_test = np.square(errors).mean()
# seasonalities and then using a standard time-series model to forecast the # trend and cyclical components. # # Here we use STL to handle the seasonality and then an ARIMA(1,1,0) to # model the deseasonalized data. The seasonal component is forecast from the # find full cycle where # # $$E[S_{T+h}|\mathcal{F}_T]=\hat{S}_{T-k}$$ # # where $k= m - h + m \lfloor \frac{h-1}{m} \rfloor$. The forecast # automatically adds the seasonal component forecast to the ARIMA forecast. from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa.forecasting.stl import STLForecast elec_equip.index.freq = elec_equip.index.inferred_freq stlf = STLForecast(elec_equip, ARIMA, model_kwargs=dict(order=(1, 1, 0), trend="t")) stlf_res = stlf.fit() forecast = stlf_res.forecast(24) plt.plot(elec_equip) plt.plot(forecast) plt.show() # ``summary`` contains information about both the time-series model and # the STL decomposition. print(stlf_res.summary())