def ets_predict(i): if online: # extend the train-series with observed values as we move forward in the prediction horizon # to achieve a receding window prediction y_train_i = pd.concat([y_train, y_test.iloc[0:i]]) model = ETSModel(y_train_i, error="add", trend="add", damped_trend=True, seasonal="add", dates=y_train_i.index) fit = model.fit() # There are several different ETS methods available: # - forecast: makes out of sample predictions # - predict: in sample and out of sample predictions # - simulate: runs simulations of the statespace model # - get_prediction: in sample and out of sample predictions, as well as prediction intervals pred = fit.get_prediction( start=y_test.index[i], end=y_test.index[i + forecast_horizon - 1] ).summary_frame( ) # with: method = 'simulated', simulate_repetitions=100 we can simulate the PI's ## --plotting current prediction-- # plt.rcParams['figure.figsize'] = (12, 8) # pred["mean"].plot(label='mean prediction') # pred["pi_lower"].plot(linestyle='--', color='tab:blue', label='95% interval') # pred["pi_upper"].plot(linestyle='--', color='tab:blue', label='_') # y_test[i:i-1 + forecast_horizon].plot(label='true_values') # plt.legend() # plt.show() return pred["mean"], pred["pi_upper"], pred["pi_lower"]
def statespace_comparison(austourists): ets_model = ETSModel( austourists, seasonal_periods=4, error="add", trend="add", seasonal="add", damped_trend=True, ) ets_results = ets_model.fit(disp=False) statespace_model = statespace.ExponentialSmoothing( austourists, trend=True, damped_trend=True, seasonal=4, initialization_method="known", initial_level=ets_results.initial_level, initial_trend=ets_results.initial_trend, initial_seasonal=ets_results.initial_seasonal, ) with statespace_model.fix_params({ "smoothing_level": ets_results.smoothing_level, "smoothing_trend": ets_results.smoothing_trend, "smoothing_seasonal": ets_results.smoothing_seasonal, "damping_trend": ets_results.damping_trend, }): statespace_results = statespace_model.fit() return ets_results, statespace_results
def test_one_step_ahead(setup_model): model, params, results_R = setup_model model2 = ETSModel( pd.Series(model.endog), seasonal_periods=model.seasonal_periods, error=model.error, trend=model.trend, seasonal=model.seasonal, damped_trend=model.damped_trend, ) res = model2.smooth(params) fcast1 = res.forecast(steps=1) fcast2 = res.forecast(steps=2) assert_allclose(fcast1.iloc[0], fcast2.iloc[0]) pred1 = res.get_prediction(start=model2.nobs, end=model2.nobs, simulate_repetitions=2) pred2 = res.get_prediction(start=model2.nobs, end=model2.nobs + 1, simulate_repetitions=2) df1 = pred1.summary_frame(alpha=0.05) df2 = pred1.summary_frame(alpha=0.05) assert_allclose(df1.iloc[0, 0], df2.iloc[0, 0])
def test_prediction_results_slow_AAN(oildata): # slow test with high number of simulation repetitions for comparison # Note: runs succesfull with specified tolerance fit = ETSModel(oildata, error="add", trend="add").fit(disp=False) pred_exact = fit.get_prediction(start=40, end=55) summary_exact = pred_exact.summary_frame() pred_sim = fit.get_prediction( start=40, end=55, simulate_repetitions=int(1e6), random_state=11, method="simulated", ) summary_sim = pred_sim.summary_frame() # check if mean converges to expected mean assert_allclose( summary_sim["mean"].values, summary_sim["mean_numerical"].values, rtol=1e-3, atol=1e-3, ) import matplotlib.pyplot as plt plt.switch_backend("TkAgg") for i in range(1000): plt.plot( pred_sim._results.simulation_results.iloc[:, i], color="grey", alpha=0.1, ) plt.plot(oildata[40:], "-", label="data") plt.plot(summary_exact["mean"], "--", label="mean") plt.plot(summary_sim["pi_lower"], ":", label="sim lower") plt.plot(summary_exact["pi_lower"], ".-", label="exact lower") plt.plot(summary_sim["pi_upper"], ":", label="sim upper") plt.plot(summary_exact["pi_upper"], ".-", label="exact upper") # plt.legend() plt.show() # check if prediction intervals are equal assert_allclose( summary_sim["pi_lower"].values, summary_exact["pi_lower"].values, rtol=1e-4, atol=1e-4, ) assert_allclose( summary_sim["pi_upper"].values, summary_exact["pi_upper"].values, rtol=1e-4, atol=1e-4, )
def test_seasonal_periods(austourists): # test auto-deduction of period model = ETSModel(austourists, error="add", trend="add", seasonal="add") assert model.seasonal_periods == 4 # test if seasonal period raises error try: model = ETSModel(austourists, seasonal="add", seasonal_periods=0) except ValueError: pass
def test_bounded_fit(oildata): beta = [0.99, 0.99] model1 = ETSModel( oildata, error="add", trend="add", damped_trend=True, bounds={"smoothing_trend": beta}, ) fit1 = model1.fit(disp=False) assert fit1.smoothing_trend == 0.99 # same using with fix_params semantic model2 = ETSModel( oildata, error="add", trend="add", damped_trend=True, ) with model2.fix_params({"smoothing_trend": 0.99}): fit2 = model2.fit(disp=False) assert fit2.smoothing_trend == 0.99 assert_allclose(fit1.params, fit2.params) fit2.summary() # check if summary runs without failing # using fit_constrained fit3 = model2.fit_constrained({"smoothing_trend": 0.99}) assert fit3.smoothing_trend == 0.99 assert_allclose(fit1.params, fit3.params) fit3.summary()
def test_aicc_0_dof(): # GH8172 endog = [109.0, 101.0, 104.0, 90.0, 105.0] model = ETSModel(endog=endog, initialization_method='known', initial_level=100.0, initial_trend=0.0, error='add', trend='add', damped_trend=True) aicc = model.fit().aicc assert not np.isfinite(aicc) assert aicc > 0
def test_initialization_known(austourists): initial_level, initial_trend = [36.46466837, 34.72584983] model = ETSModel( austourists, error="add", trend="add", damped_trend=True, initialization_method="known", initial_level=initial_level, initial_trend=initial_trend, ) internal_params = model._internal_params(model._start_params) assert initial_level == internal_params[4] assert initial_trend == internal_params[5] assert internal_params[6] == 0
def oildata_model(oildata): return ETSModel( oildata, error="add", trend="add", damped_trend=True, )
def test_convergence_simple(): # issue 6883 gen = np.random.RandomState(0) e = gen.standard_normal(12000) y = e.copy() for i in range(1, e.shape[0]): y[i] = y[i - 1] - 0.2 * e[i - 1] + e[i] y = y[200:] mod = holtwinters.ExponentialSmoothing(y, initialization_method="estimated") res = mod.fit() ets_res = ETSModel(y).fit() # the smoothing level should be very similar, the initial state might be # different as it doesn't influence the final result too much assert_allclose( res.params["smoothing_level"], ets_res.smoothing_level, rtol=1e-4, atol=1e-4, ) # the first few values are influenced by differences in initial state, so # we don't test them here assert_allclose(res.fittedvalues[10:], ets_res.fittedvalues[10:], rtol=1e-4, atol=1e-4)
def test_estimated_initialization_short_data(oildata, trend, seasonal, nobs): # GH 7319 res = ETSModel(oildata[:nobs], trend=trend, seasonal=seasonal, seasonal_periods=4, initialization_method='estimated').fit() assert ~np.any(np.isnan(res.params))
def austourists_model(austourists): return ETSModel( austourists, seasonal_periods=4, error="add", trend="add", seasonal="add", damped_trend=True, )
def test_seasonal_order(reset_randomstate, method): seasonal = np.arange(12.0) time_series = np.array(list(seasonal) * 100) res = ETSModel( time_series, seasonal="add", seasonal_periods=12, initialization_method=method, ).fit() assert_allclose( res.initial_seasonal + res.initial_level, seasonal, atol=1e-4, rtol=1e-4, ) assert res.mae < 1e-6
def test_convergence_simple(): # issue 6883 gen = np.random.RandomState(0) e = gen.standard_normal(12000) y = e.copy() for i in range(1, e.shape[0]): y[i] = y[i - 1] - 0.2 * e[i - 1] + e[i] y = y[200:] res = ExponentialSmoothing(y).fit() ets_res = ETSModel(y).fit() # the smoothing level should be very similar, the initial state might be # different as it doesn't influence the final result too much assert_almost_equal(res.params['smoothing_level'], ets_res.smoothing_level, 3) # the first few values are influenced by differences in initial state, so # we don't test them here assert_almost_equal(res.fittedvalues[10:], ets_res.fittedvalues[10:], 3)
def setup_model( request, austourists, oildata, ets_austourists_fit_results_R, ets_oildata_fit_results_R, ): params = request.param error, trend, seasonal, damped = params[0:4] data = params[4] if data == "austourists": data = austourists seasonal_periods = 4 results = ets_austourists_fit_results_R[damped] else: data = oildata seasonal_periods = None results = ets_oildata_fit_results_R[damped] name = short_model_name(error, trend, seasonal) if name not in results: pytest.skip(f"model {name} not implemented or not converging in R") results_R = results[name] params = get_params_from_R(results_R) model = ETSModel( data, seasonal_periods=seasonal_periods, error=error, trend=trend, seasonal=seasonal, damped_trend=damped, ) return model, params, results_R
def test_initialization_heuristic(oildata): model_estimated = ETSModel( oildata, error="add", trend="add", damped_trend=True, initialization_method="estimated", ) model_heuristic = ETSModel( oildata, error="add", trend="add", damped_trend=True, initialization_method="heuristic", ) fit_estimated = model_estimated.fit(disp=False) fit_heuristic = model_heuristic.fit(disp=False) yhat_estimated = fit_estimated.fittedvalues.values yhat_heuristic = fit_heuristic.fittedvalues.values # this test is mostly just to see if it works, so we only test whether the # result is not totally off assert_allclose(yhat_estimated[10:], yhat_heuristic[10:], rtol=0.5)
def Initial_Parameter_calculater(series_training, exogen, alpha, beta, gamma, omega, epsilon, smoothing, yearly_seasonality, initialisation, days_before_events, days_after_events, minpvalue, mineffect): if initialisation == 'Standard': ''' This function calculates initial parameters for the optimization of our ETS Model for our series. The Calculation is odne according to Forecasting by exponential smoothing (Hyndman et al. 2008) p.23-24. First, the initial seasonal parameters are calculated. This is done by computing a 7 lags moving average and then an additional 2 lag moving average on the resulting data. These results are used to detrend the series. Finally the average of the detrended values are used to obtain the initial seasonal parameters. Second, the initial Level and slope parameters are calculated. This is done by calculating a linear regression with a time dependent trend on first ten seasonally adjusted values. The model intercept becomes the initial level parameter. The initial slope is calculated by dividing the model trend through the mean of the first ten values of the series. The division is done as our model has a multiplicativ trend. The initial parameters for the exogen effects are calculated similar to the slope coefficient. We calculate a regression with each exogen variable as an explanatory variable. Then we divide the resulting coefficients by the mean of the series to obtain our initial parameters. Note that we use regress onto entire series as we have few observations for some events. Finally note that the smoothing parameters are set at 0.01 for beta and gamma and at 0.99 for omega. This assumes a consistent level, trend and seasonal effect, as small alpha, beta and gamma values mean weak adjustments of the level, slope and seasonal components to forecasting errors. A high omega value assumes a weak dampening of the trend as it is close to a value of 1 which would be a consistent trend. Parameters: series: the time series in a pandas Series format exogen: the exogen variables in a pandas DataFrame format with each column being a variable and the time as its index Return: an array of starting parameters for the model optimization ''' #Initial seasonal Component #Computing Moving Average f = series_training[:371].rolling(window=7).mean() f = f.rolling(window=2).mean() #Detrending for multiplicative model #skip first 7 values of both series as they are used to start the moving average and only go till the 365 time point detrended_series = series_training[7:371] / f[7:] detrended_series.index = pd.to_datetime(detrended_series.index, format='%Y-%m-%d') #Check what weekday the first observation is and store it in order to get the #initial seasonal parameters in the right order. Daynumber = pd.to_datetime(series_training.index, format='%Y-%m-%d')[0].weekday() #grouping detrended series by the day of the week and computing the means weekyday_means = detrended_series.groupby(detrended_series.index.dayofweek).mean() #Define all inital seasonal values. #Note:The oldes value is the current seasonal. weekly_initial = np.zeros(7) for i in range(0, 7): weekly_initial[i] = weekyday_means[abs(Daynumber - i)] #Normalizing the seasonal indices so they add to m (m=7 as we have weekly seasonality). #done by dividing them all by there total sum and multiplying with m. total = sum(weekly_initial) multiplier = 7 / total weekly_initial = weekly_initial * multiplier #Initial Level and slope components #creating a dataframe containing the first 10 values seasonaly adjusted (values) and a time index (t) first_10 = pd.DataFrame() first_10['values'] = np.zeros(10) first_10['t'] = range(0,10) #computing the seasonal adjustment #first by creating a data frame with the first 10 seasonal adjustments weekly_initial_10 = np.concatenate((weekly_initial,weekly_initial[0:3])) weekly_initial_10 = pd.DataFrame(weekly_initial_10, columns=['inits']) #computing the seasonally adjusted values for i in range(0,10): first_10.at[i,'values'] = series_training[i] / weekly_initial_10.at[i, 'inits'] #Computing the Linear regression with the first 10 seasonally adjusted values reg = LinearRegression().fit(first_10['t'].values.reshape(-1,1),first_10['values'].values.reshape(-1,1)) #Initial level component is equal to the intercept level_initial = reg.intercept_[0] #Intial slope component is equal to the regression coefficient slope_initial = reg.coef_[0] / mean(series_training[0:10]) #Initial values for the regressors reg2 = LinearRegression().fit(exogen[0:len(series_training)], series_training) #defining values for starting parameters of the exogen variables #as we have a model with multiplicative effect i divide the coefficients by the mean over the time period exogen_initial_parameters = reg2.coef_[0:exogen.shape[1]] / mean(series_training) #Initial Yearly Seasonality effects #This part of the code is only executed if the user specifies the yearly seasonality to be modelled by a fourier series if yearly_seasonality == "fourier": #defining the index as a date variable which will become relevant for subsequent computation yearly = pd.DataFrame({'date': series_training.index}) yearly = yearly.set_index(pd.PeriodIndex(series_training.index, freq='D')) # yearly seasonality with N=10 fourier series elements #Therefore we define 20 elements as we have 10 sin and cos elements. fourier_series_sin_elements_names = ['yearly_sin365','yearly_sin365_2', 'yearly_sin365_3','yearly_sin365_4', 'yearly_sin365_5', 'yearly_sin365_6', 'yearly_sin365_7', 'yearly_sin365_8', 'yearly_sin365_9', 'yearly_sin365_10',] fourier_series_cos_elements_names = ['yearly_cos365','yearly_cos365_2', 'yearly_cos365_3','yearly_cos365_4', 'yearly_cos365_5', 'yearly_cos365_6', 'yearly_cos365_7', 'yearly_cos365_8', 'yearly_cos365_9', 'yearly_cos365_10'] fourier_series_elements_multiplicator = [2,4,6,8,10,12,14,16,18,20] for element in range(0,10): yearly[fourier_series_sin_elements_names[element]] = np.sin(fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25) yearly[fourier_series_cos_elements_names[element]] = np.cos(fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25) # deleting date column as it is no longer required and should not be in the linear regression del yearly['date'] # add week day dummies so that weekly seasonality is accounted for Weekdays = ['Monday','Tuesday','Wensday','Thursday','Friday','Saturday','Sunday'] for days in Weekdays: Weekdays_number = Weekdays.index(days) yearly[days] = np.zeros(len(yearly)) for i in range(0, len(yearly)): if yearly.index.dayofweek[i] == Weekdays_number: yearly[days][i] = 1 else: yearly[days][i] = 0 #Linear regression to estimate initial yearly seasonality parameters #we regress the 20 sin(t) and cos(t) from our fourier series + 7 weekly dummies (as control) on the entire 1 year fit data #this gets us estimates for the fourier series weights in the first year reg3 = LinearRegression().fit(yearly[0:365], series_training[0:365]) #deviding the resulting coefficients by the mean of the data over that period #reason: we have multiplicative thus relative seasonality, in our regression we have absolute #so we divide by series mean to get relative estimates yearly_initial = reg3.coef_ / mean(series_training) # we ommit our 7 daily seasonality estimates so that we only have the yearly estimates for the optimization yearly_initial = yearly_initial[:-7] if yearly_seasonality == 'dummies': # Initial parameters for yearly seasonality modelled by monthly dummies yearly_initial = series_training.groupby(series_training.index.month).mean() / mean(series_training) #Defining Starting Parameters array #The first values are the smoothing parameters: alpha, beta, gamma, omega #The If loop gives back the parameters with our without yearly seasonality. if yearly_seasonality == "fourier" or yearly_seasonality == "dummies": if smoothing: Starting_Parameters = np.concatenate((level_initial, slope_initial, weekly_initial, exogen_initial_parameters, yearly_initial), axis=None) else: Starting_Parameters = np.concatenate((alpha, beta, omega, gamma, level_initial, slope_initial, weekly_initial, exogen_initial_parameters, yearly_initial, epsilon), axis=None) else: if smoothing: Starting_Parameters = np.concatenate((level_initial, slope_initial, weekly_initial, exogen_initial_parameters), axis=None) else: Starting_Parameters = np.concatenate((alpha, beta, gamma, omega, level_initial, slope_initial, weekly_initial, exogen_initial_parameters), axis=None) return Starting_Parameters, exogen elif initialisation == 'ETS_Optimizing_days_around_events': #Computing the ETS Model with dampened additiv trend, multiplicativ error and multiplicativ 7 day seasonality from statsmodels.tsa.exponential_smoothing.ets import ETSModel #creating a copy of the series for the following step series_copy = series_training # Checking for zero days as these dont work in an ETS model with multiplicativ components, # and turning thm into the mean of the day before and after for day in range(0, len(series_copy)): if series_copy[day] <= 0: series_copy[day] = (series_copy[day - 1] + series_copy[day + 1]) / 2 # Optimizing the Basic ETS Model model = ETSModel(endog=series_copy, error="mul", trend="add", damped_trend=True, seasonal="mul", seasonal_periods=7) fit = model.fit() #extracting the intial values alpha = fit.params[0] beta = fit.params[1] gamma = fit.params[2] omega = fit.params[3] level_initial = fit.params[4] slope_initial = fit.params[5] weekly_initial = fit.params[6:12+1] #calculating model errors for OLS errors = (series_training - fit.fittedvalues) / fit.fittedvalues #days before df_before = pd.DataFrame() df_before['date'] = exogen.index for event in exogen.columns: event_col_number = exogen.columns.get_loc(event) before_event = [i for i in range(1, days_before_events + 1)] for d in before_event: day_before = list() for i in range(0, len(exogen.index) - d): if exogen.iloc[i + d, exogen.columns.get_loc(event)] == 1: day_before.append(1) else: day_before.append(0) day_before.append(np.zeros(d)) day_before = np.concatenate(day_before, axis=None) df_before[str(str(d) + '_days_before_' + event)] = day_before df_before.index = df_before['date'] del df_before['date'] #days after df_after = pd.DataFrame() df_after['date'] = exogen.index for event in exogen.columns: event_col_number = exogen.columns.get_loc(event) after_event = [i for i in range(1, days_after_events + 1)] for d in after_event: day_after = list() for i in range(0, len(exogen.index) - d): if exogen.iloc[i - d, exogen.columns.get_loc(event)] == 1: day_after.append(1) else: day_after.append(0) day_after.append(np.zeros(d)) day_after = np.concatenate(day_after, axis=None) df_after[str(str(d) + '_days_after_' + event)] = day_after df_after.index = df_after['date'] del df_after['date'] #merging of days before and after with the exogen dataset exogen = pd.merge(exogen, df_before, left_index=True, right_index=True) exogen = pd.merge(exogen, df_after, left_index=True, right_index=True) #Now to the OLS Regression #If we include yearly seasonality in the model we include it int othe OLS to get initial values if yearly_seasonality == "fourier": # defining the index as a date variable which will become relevant for subsequent computation yearly = pd.DataFrame({'date': series_training.index}) yearly.index = pd.to_datetime(series_training.index) # yearly seasonality with N=10 fourier series elements # Therefore we define 20 elements as we have 10 sin and cos elements. fourier_series_sin_elements_names = ['yearly_sin365', 'yearly_sin365_2', 'yearly_sin365_3', 'yearly_sin365_4', 'yearly_sin365_5', 'yearly_sin365_6', 'yearly_sin365_7', 'yearly_sin365_8', 'yearly_sin365_9', 'yearly_sin365_10', ] fourier_series_cos_elements_names = ['yearly_cos365', 'yearly_cos365_2', 'yearly_cos365_3', 'yearly_cos365_4', 'yearly_cos365_5', 'yearly_cos365_6', 'yearly_cos365_7', 'yearly_cos365_8', 'yearly_cos365_9', 'yearly_cos365_10'] fourier_series_elements_multiplicator = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] for element in range(0, 10): yearly[fourier_series_sin_elements_names[element]] = np.sin( fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25) yearly[fourier_series_cos_elements_names[element]] = np.cos( fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25) # deleting date column as it is no longer required and should not be in the linear regression del yearly['date'] #OLS Model with exogen variables and fourier elements ''' print(exogen[0:len(errors)].shape) print(yearly[0:len(errors)].shape) exogen.index = pd.to_datetime(exogen.index) #yearly.index = pd.to_datetime(yearly.index) print(pd.merge(exogen[0:len(errors)] , yearly[0:len(errors)],left_index=True, right_index=True).shape) ''' model = sm.OLS(errors, pd.merge(exogen[0:len(errors)] , yearly[0:len(errors)],left_index=True, right_index=True) ) results = model.fit() # deleting the insignificant columns, Note that we only look at the columns of the events to_delete_rows = [] for i in range(0, exogen.shape[1]): if results.pvalues[i] >= minpvalue or abs(results.params[i]) <= mineffect: to_delete_rows.append(i) for col in exogen.columns[to_delete_rows]: del exogen[col] # rerunning the OLS regression to obtain the exogen initial parameters model = sm.OLS(errors, pd.merge(exogen[0:len(errors)] , yearly[0:len(errors)],left_index=True, right_index=True) ) results = model.fit() exogen_initial_parameters = results.params[0:exogen.shape[1]] yearly_initial = results.params[exogen.shape[1]:] elif yearly_seasonality == 'dummies': yearly = pd.DataFrame({'date': series_training.index}) yearly = yearly.set_index(pd.PeriodIndex(series_training.index, freq='D')) Months = ['January', 'February ', 'March', 'April ', 'May', 'June', 'July', 'August', 'September', 'October', 'December'] for Month in Months: Month_number = Months.index(Month) + 1 #January is month 1 yearly[Month] = np.zeros(len(yearly)) for i in range(0, len(yearly)): if yearly.index.month[i] == Month_number: yearly[Month][i] = 1 else: yearly[Month][i] = 0 del yearly['date'] # OLS Model with exogen variables and fourier elements model = sm.OLS(errors, [exogen[0:len(errors)], yearly]) results = model.fit() # deleting the insignificant columns, Note that we only look at the columns of the events to_delete_rows = [] for i in range(0, exogen.shape[1]): if results.pvalues[i] >= minpvalue or abs(results.params[i]) <= mineffect: to_delete_rows.append(i) for col in exogen.columns[to_delete_rows]: del exogen[col] # rerunning the OLS regression to obtain the exogen initial parameters model = sm.OLS(errors, e[exogen[0:len(errors)], yearly]) results = model.fit() exogen_initial_parameters = results.params[0:exogen.shape[1]] yearly_initial = results.params[exogen.shapex[1]:] else: model = sm.OLS(errors, exogen[0:len(errors)]) results = model.fit() to_delete_rows = [] for i in range(0, exogen.shape[1]): if results.pvalues[i] >= minpvalue or abs(results.params[i]) <= mineffect: to_delete_rows.append(i) #deleting the insignificant columns for col in exogen.columns[to_delete_rows]: del exogen[col] #rerunning the OLS regression to obtain the exogen initial parameters model = sm.OLS(errors, exogen[0:len(errors)]) results = model.fit() exogen_initial_parameters = results.params #returning intial parameters if yearly_seasonality == "fourier" or yearly_seasonality == "dummies": if smoothing: Starting_Parameters = np.concatenate((level_initial, slope_initial, weekly_initial, exogen_initial_parameters, yearly_initial), axis=None) else: Starting_Parameters = np.concatenate((alpha, beta, omega, gamma, level_initial, slope_initial, weekly_initial, exogen_initial_parameters, yearly_initial, epsilon), axis=None) else: if smoothing: Starting_Parameters = np.concatenate((level_initial, slope_initial, weekly_initial, exogen_initial_parameters), axis=None) else: Starting_Parameters = np.concatenate((alpha, beta, gamma, omega, level_initial, slope_initial, weekly_initial, exogen_initial_parameters), axis=None) return Starting_Parameters, exogen
def exp_smoothing(y_train, y_test, forecast_horizon=1, limit_steps=False, pi_alpha=1.96, online=True): """ Train an exponential smoothing timeseries model (ETS) Parameters ---------- y_train : pandas.DataFrame The train values of the target variable y_test : pandas.DataFrame Values of exogenous features forecast_horizon : int, default = 1 Number of future steps to be forecasted limit_steps : int, default = False limits the number of simulation/predictions into the future. If False, steps is equal to length of validation set pi_alpha : float, default = 1.96 Measure to adjust confidence interval, default is set to 1.96, which is a 95% PI online : bool, default = True if True the new observations are used to fit the model again Returns ------- ndarray Expected forecast values for each test sample over the forecast horizon. (Shape: (len(y_train),forecast_horizon)) ndarray The upper interval for the given forecasts. (Shape: (1,forecast_horizon)) ndarray The lower interval for the forecasts. (Shape: (1,forecast_horizon)) """ print('Train an exponential smoothing timeseries model (ETS)...') num_cores = max(multiprocessing.cpu_count() - 2, 1) model = ETSModel(y_train, error="add", trend="add", damped_trend=True, seasonal="add", dates=y_train.index) fit = model.fit() def ets_predict(i): if online: # extend the train-series with observed values as we move forward in the prediction horizon # to achieve a receding window prediction y_train_i = pd.concat([y_train, y_test.iloc[0:i]]) model = ETSModel(y_train_i, error="add", trend="add", damped_trend=True, seasonal="add", dates=y_train_i.index) fit = model.fit() # There are several different ETS methods available: # - forecast: makes out of sample predictions # - predict: in sample and out of sample predictions # - simulate: runs simulations of the statespace model # - get_prediction: in sample and out of sample predictions, as well as prediction intervals pred = fit.get_prediction( start=y_test.index[i], end=y_test.index[i + forecast_horizon - 1] ).summary_frame( ) # with: method = 'simulated', simulate_repetitions=100 we can simulate the PI's ## --plotting current prediction-- # plt.rcParams['figure.figsize'] = (12, 8) # pred["mean"].plot(label='mean prediction') # pred["pi_lower"].plot(linestyle='--', color='tab:blue', label='95% interval') # pred["pi_upper"].plot(linestyle='--', color='tab:blue', label='_') # y_test[i:i-1 + forecast_horizon].plot(label='true_values') # plt.legend() # plt.show() return pred["mean"], pred["pi_upper"], pred["pi_lower"] test_period = range(len(y_test)) if limit_steps: test_period = range(limit_steps) expected_value, fc_u, fc_l = \ zip(*Parallel(n_jobs=min(num_cores,len(test_period)), mmap_mode = 'c', temp_folder='/tmp')(delayed(ets_predict)(i) for i in test_period if i+forecast_horizon<=len(y_test))) print('Training and validating ETS model completed.') return np.asarray(expected_value), np.asarray(fc_u), np.asarray(fc_l)
525.9509, 549.8338, 542.3405, ] oil = pd.Series(oildata, index=pd.date_range("1965", "2013", freq="AS")) oil.plot() plt.ylabel("Annual oil production in Saudi Arabia (Mt)") # The plot above shows annual oil production in Saudi Arabia in million # tonnes. The data are taken from the R package `fpp2` (companion package to # prior version [1]). # Below you can see how to fit a simple exponential smoothing model using # statsmodels's ETS implementation to this data. Additionally, the fit using # `forecast` in R is shown as comparison. model = ETSModel(oil) fit = model.fit(maxiter=10000) oil.plot(label="data") fit.fittedvalues.plot(label="statsmodels fit") plt.ylabel("Annual oil production in Saudi Arabia (Mt)") # obtained from R params_R = [ 0.99989969, 0.11888177503085334, 0.80000197, 36.46466837, 34.72584983 ] yhat = model.smooth(params_R).fittedvalues yhat.plot(label="R fit", linestyle="--") plt.legend() # By default the initial states are considered to be fitting parameters