def ets_predict(i):
     if online:
         # extend the train-series with observed values as we move forward in the prediction horizon
         # to achieve a receding window prediction
         y_train_i = pd.concat([y_train, y_test.iloc[0:i]])
         model = ETSModel(y_train_i,
                          error="add",
                          trend="add",
                          damped_trend=True,
                          seasonal="add",
                          dates=y_train_i.index)
         fit = model.fit()
     # There are several different ETS methods available:
     #  - forecast: makes out of sample predictions
     #  - predict: in sample and out of sample predictions
     #  - simulate: runs simulations of the statespace model
     #  - get_prediction: in sample and out of sample predictions, as well as prediction intervals
     pred = fit.get_prediction(
         start=y_test.index[i], end=y_test.index[i + forecast_horizon - 1]
     ).summary_frame(
     )  # with: method = 'simulated', simulate_repetitions=100 we can simulate the PI's
     ## --plotting current prediction--
     # plt.rcParams['figure.figsize'] = (12, 8)
     # pred["mean"].plot(label='mean prediction')
     # pred["pi_lower"].plot(linestyle='--', color='tab:blue', label='95% interval')
     # pred["pi_upper"].plot(linestyle='--', color='tab:blue', label='_')
     # y_test[i:i-1 + forecast_horizon].plot(label='true_values')
     # plt.legend()
     # plt.show()
     return pred["mean"], pred["pi_upper"], pred["pi_lower"]
def statespace_comparison(austourists):
    ets_model = ETSModel(
        austourists,
        seasonal_periods=4,
        error="add",
        trend="add",
        seasonal="add",
        damped_trend=True,
    )
    ets_results = ets_model.fit(disp=False)

    statespace_model = statespace.ExponentialSmoothing(
        austourists,
        trend=True,
        damped_trend=True,
        seasonal=4,
        initialization_method="known",
        initial_level=ets_results.initial_level,
        initial_trend=ets_results.initial_trend,
        initial_seasonal=ets_results.initial_seasonal,
    )
    with statespace_model.fix_params({
            "smoothing_level":
            ets_results.smoothing_level,
            "smoothing_trend":
            ets_results.smoothing_trend,
            "smoothing_seasonal":
            ets_results.smoothing_seasonal,
            "damping_trend":
            ets_results.damping_trend,
    }):
        statespace_results = statespace_model.fit()

    return ets_results, statespace_results
def test_one_step_ahead(setup_model):
    model, params, results_R = setup_model
    model2 = ETSModel(
        pd.Series(model.endog),
        seasonal_periods=model.seasonal_periods,
        error=model.error,
        trend=model.trend,
        seasonal=model.seasonal,
        damped_trend=model.damped_trend,
    )
    res = model2.smooth(params)

    fcast1 = res.forecast(steps=1)
    fcast2 = res.forecast(steps=2)
    assert_allclose(fcast1.iloc[0], fcast2.iloc[0])

    pred1 = res.get_prediction(start=model2.nobs,
                               end=model2.nobs,
                               simulate_repetitions=2)
    pred2 = res.get_prediction(start=model2.nobs,
                               end=model2.nobs + 1,
                               simulate_repetitions=2)
    df1 = pred1.summary_frame(alpha=0.05)
    df2 = pred1.summary_frame(alpha=0.05)
    assert_allclose(df1.iloc[0, 0], df2.iloc[0, 0])
def test_prediction_results_slow_AAN(oildata):
    # slow test with high number of simulation repetitions for comparison
    # Note: runs succesfull with specified tolerance
    fit = ETSModel(oildata, error="add", trend="add").fit(disp=False)

    pred_exact = fit.get_prediction(start=40, end=55)
    summary_exact = pred_exact.summary_frame()

    pred_sim = fit.get_prediction(
        start=40,
        end=55,
        simulate_repetitions=int(1e6),
        random_state=11,
        method="simulated",
    )
    summary_sim = pred_sim.summary_frame()
    # check if mean converges to expected mean
    assert_allclose(
        summary_sim["mean"].values,
        summary_sim["mean_numerical"].values,
        rtol=1e-3,
        atol=1e-3,
    )

    import matplotlib.pyplot as plt

    plt.switch_backend("TkAgg")
    for i in range(1000):
        plt.plot(
            pred_sim._results.simulation_results.iloc[:, i],
            color="grey",
            alpha=0.1,
        )
    plt.plot(oildata[40:], "-", label="data")
    plt.plot(summary_exact["mean"], "--", label="mean")
    plt.plot(summary_sim["pi_lower"], ":", label="sim lower")
    plt.plot(summary_exact["pi_lower"], ".-", label="exact lower")
    plt.plot(summary_sim["pi_upper"], ":", label="sim upper")
    plt.plot(summary_exact["pi_upper"], ".-", label="exact upper")
    # plt.legend()
    plt.show()

    # check if prediction intervals are equal
    assert_allclose(
        summary_sim["pi_lower"].values,
        summary_exact["pi_lower"].values,
        rtol=1e-4,
        atol=1e-4,
    )

    assert_allclose(
        summary_sim["pi_upper"].values,
        summary_exact["pi_upper"].values,
        rtol=1e-4,
        atol=1e-4,
    )
def test_seasonal_periods(austourists):
    # test auto-deduction of period
    model = ETSModel(austourists, error="add", trend="add", seasonal="add")
    assert model.seasonal_periods == 4

    # test if seasonal period raises error
    try:
        model = ETSModel(austourists, seasonal="add", seasonal_periods=0)
    except ValueError:
        pass
def test_bounded_fit(oildata):
    beta = [0.99, 0.99]
    model1 = ETSModel(
        oildata,
        error="add",
        trend="add",
        damped_trend=True,
        bounds={"smoothing_trend": beta},
    )
    fit1 = model1.fit(disp=False)
    assert fit1.smoothing_trend == 0.99

    # same using with fix_params semantic
    model2 = ETSModel(
        oildata,
        error="add",
        trend="add",
        damped_trend=True,
    )
    with model2.fix_params({"smoothing_trend": 0.99}):
        fit2 = model2.fit(disp=False)
    assert fit2.smoothing_trend == 0.99
    assert_allclose(fit1.params, fit2.params)
    fit2.summary()  # check if summary runs without failing

    # using fit_constrained
    fit3 = model2.fit_constrained({"smoothing_trend": 0.99})
    assert fit3.smoothing_trend == 0.99
    assert_allclose(fit1.params, fit3.params)
    fit3.summary()
def test_aicc_0_dof():
    # GH8172
    endog = [109.0, 101.0, 104.0, 90.0, 105.0]

    model = ETSModel(endog=endog,
                     initialization_method='known',
                     initial_level=100.0,
                     initial_trend=0.0,
                     error='add',
                     trend='add',
                     damped_trend=True)
    aicc = model.fit().aicc
    assert not np.isfinite(aicc)
    assert aicc > 0
def test_initialization_known(austourists):
    initial_level, initial_trend = [36.46466837, 34.72584983]
    model = ETSModel(
        austourists,
        error="add",
        trend="add",
        damped_trend=True,
        initialization_method="known",
        initial_level=initial_level,
        initial_trend=initial_trend,
    )
    internal_params = model._internal_params(model._start_params)
    assert initial_level == internal_params[4]
    assert initial_trend == internal_params[5]
    assert internal_params[6] == 0
def oildata_model(oildata):
    return ETSModel(
        oildata,
        error="add",
        trend="add",
        damped_trend=True,
    )
def test_convergence_simple():
    # issue 6883
    gen = np.random.RandomState(0)
    e = gen.standard_normal(12000)
    y = e.copy()
    for i in range(1, e.shape[0]):
        y[i] = y[i - 1] - 0.2 * e[i - 1] + e[i]
    y = y[200:]
    mod = holtwinters.ExponentialSmoothing(y,
                                           initialization_method="estimated")
    res = mod.fit()
    ets_res = ETSModel(y).fit()

    # the smoothing level should be very similar, the initial state might be
    # different as it doesn't influence the final result too much
    assert_allclose(
        res.params["smoothing_level"],
        ets_res.smoothing_level,
        rtol=1e-4,
        atol=1e-4,
    )

    # the first few values are influenced by differences in initial state, so
    # we don't test them here
    assert_allclose(res.fittedvalues[10:],
                    ets_res.fittedvalues[10:],
                    rtol=1e-4,
                    atol=1e-4)
def test_estimated_initialization_short_data(oildata, trend, seasonal, nobs):
    # GH 7319
    res = ETSModel(oildata[:nobs],
                   trend=trend,
                   seasonal=seasonal,
                   seasonal_periods=4,
                   initialization_method='estimated').fit()
    assert ~np.any(np.isnan(res.params))
def austourists_model(austourists):
    return ETSModel(
        austourists,
        seasonal_periods=4,
        error="add",
        trend="add",
        seasonal="add",
        damped_trend=True,
    )
def test_seasonal_order(reset_randomstate, method):
    seasonal = np.arange(12.0)
    time_series = np.array(list(seasonal) * 100)
    res = ETSModel(
        time_series,
        seasonal="add",
        seasonal_periods=12,
        initialization_method=method,
    ).fit()
    assert_allclose(
        res.initial_seasonal + res.initial_level,
        seasonal,
        atol=1e-4,
        rtol=1e-4,
    )
    assert res.mae < 1e-6
def test_convergence_simple():
    # issue 6883
    gen = np.random.RandomState(0)
    e = gen.standard_normal(12000)
    y = e.copy()
    for i in range(1, e.shape[0]):
        y[i] = y[i - 1] - 0.2 * e[i - 1] + e[i]
    y = y[200:]
    res = ExponentialSmoothing(y).fit()
    ets_res = ETSModel(y).fit()

    # the smoothing level should be very similar, the initial state might be
    # different as it doesn't influence the final result too much
    assert_almost_equal(res.params['smoothing_level'], ets_res.smoothing_level,
                        3)

    # the first few values are influenced by differences in initial state, so
    # we don't test them here
    assert_almost_equal(res.fittedvalues[10:], ets_res.fittedvalues[10:], 3)
def setup_model(
    request,
    austourists,
    oildata,
    ets_austourists_fit_results_R,
    ets_oildata_fit_results_R,
):
    params = request.param
    error, trend, seasonal, damped = params[0:4]
    data = params[4]
    if data == "austourists":
        data = austourists
        seasonal_periods = 4
        results = ets_austourists_fit_results_R[damped]
    else:
        data = oildata
        seasonal_periods = None
        results = ets_oildata_fit_results_R[damped]

    name = short_model_name(error, trend, seasonal)
    if name not in results:
        pytest.skip(f"model {name} not implemented or not converging in R")

    results_R = results[name]
    params = get_params_from_R(results_R)

    model = ETSModel(
        data,
        seasonal_periods=seasonal_periods,
        error=error,
        trend=trend,
        seasonal=seasonal,
        damped_trend=damped,
    )

    return model, params, results_R
def test_initialization_heuristic(oildata):
    model_estimated = ETSModel(
        oildata,
        error="add",
        trend="add",
        damped_trend=True,
        initialization_method="estimated",
    )
    model_heuristic = ETSModel(
        oildata,
        error="add",
        trend="add",
        damped_trend=True,
        initialization_method="heuristic",
    )
    fit_estimated = model_estimated.fit(disp=False)
    fit_heuristic = model_heuristic.fit(disp=False)
    yhat_estimated = fit_estimated.fittedvalues.values
    yhat_heuristic = fit_heuristic.fittedvalues.values

    # this test is mostly just to see if it works, so we only test whether the
    # result is not totally off
    assert_allclose(yhat_estimated[10:], yhat_heuristic[10:], rtol=0.5)
Example #17
0
def Initial_Parameter_calculater(series_training, exogen,
                                 alpha, beta, gamma, omega, epsilon, smoothing,
                                 yearly_seasonality, initialisation, days_before_events, days_after_events, minpvalue, mineffect):

    if initialisation == 'Standard':

        '''
        This function calculates initial parameters for the optimization of our ETS Model for our series.
        The Calculation is odne according to Forecasting by exponential smoothing (Hyndman et al. 2008) p.23-24.
        First, the initial seasonal parameters are calculated. This is done by computing a 7 lags moving average and then an
        additional 2 lag moving average on the resulting data. These results are used to detrend the series. Finally the
        average of the detrended values are used to obtain the initial seasonal parameters.
        Second, the initial Level and slope parameters are calculated. This is done by calculating a linear regression with
        a time dependent trend on first ten seasonally adjusted values. The model intercept becomes the initial level parameter.
        The initial slope is calculated by dividing the model trend through the mean of the first ten values of the series.
        The division is done as our model has a multiplicativ trend.
        The initial parameters for the exogen effects are calculated similar to the slope coefficient. We calculate a regression
        with each exogen variable as an explanatory variable. Then we divide the resulting coefficients by the mean of the series
        to obtain our initial parameters. Note that we use regress onto entire series as we have few observations for some events.
        Finally note that the smoothing parameters are set at 0.01 for beta and gamma and at 0.99 for omega. This assumes a
        consistent level, trend and seasonal effect, as small alpha, beta and gamma values mean weak adjustments of the
        level, slope and seasonal components to forecasting errors. A high omega value assumes a weak dampening of the trend
        as it is close to a value of 1 which would be a consistent trend.
    
    
        Parameters:
    
            series: the time series in a pandas Series format
    
            exogen: the exogen variables in a pandas DataFrame format with each column being a variable and the time as its index
    
        Return: an array of starting parameters for the model optimization
        '''

        #Initial seasonal Component

        #Computing Moving Average

        f = series_training[:371].rolling(window=7).mean()
        f = f.rolling(window=2).mean()

        #Detrending for multiplicative model
        #skip first 7 values of both series as they are used to start the moving average and only go till the 365 time point

        detrended_series = series_training[7:371] / f[7:]
        detrended_series.index = pd.to_datetime(detrended_series.index, format='%Y-%m-%d')

        #Check what weekday the first observation is and store it in order to get the
        #initial seasonal parameters in the right order.

        Daynumber = pd.to_datetime(series_training.index, format='%Y-%m-%d')[0].weekday()

        #grouping detrended series by the day of the week and computing the means

        weekyday_means = detrended_series.groupby(detrended_series.index.dayofweek).mean()

        #Define all inital seasonal values.
        #Note:The oldes value is the current seasonal.

        weekly_initial = np.zeros(7)
        for i in range(0, 7):
            weekly_initial[i] = weekyday_means[abs(Daynumber - i)]


        #Normalizing the seasonal indices so they add to m (m=7 as we have weekly seasonality).
        #done by dividing them all by there total sum and multiplying with m.

        total = sum(weekly_initial)

        multiplier = 7 / total

        weekly_initial  = weekly_initial * multiplier

        #Initial Level and slope components

        #creating a dataframe containing the first 10 values seasonaly adjusted (values) and a time index (t)

        first_10 = pd.DataFrame()
        first_10['values'] = np.zeros(10)
        first_10['t'] = range(0,10)

        #computing the seasonal adjustment
        #first by creating a data frame with the first 10 seasonal adjustments

        weekly_initial_10 = np.concatenate((weekly_initial,weekly_initial[0:3]))
        weekly_initial_10 = pd.DataFrame(weekly_initial_10, columns=['inits'])

        #computing the seasonally adjusted values

        for i in range(0,10):
            first_10.at[i,'values'] = series_training[i] / weekly_initial_10.at[i, 'inits']

        #Computing the Linear regression with the first 10 seasonally adjusted values

        reg = LinearRegression().fit(first_10['t'].values.reshape(-1,1),first_10['values'].values.reshape(-1,1))

        #Initial level component is equal to the intercept

        level_initial = reg.intercept_[0]

        #Intial slope component is equal to the regression coefficient

        slope_initial = reg.coef_[0] / mean(series_training[0:10])

        #Initial values for the regressors

        reg2 = LinearRegression().fit(exogen[0:len(series_training)], series_training)

        #defining values for starting parameters of the exogen variables
        #as we have a model with multiplicative effect i divide the coefficients by the mean over the time period

        exogen_initial_parameters = reg2.coef_[0:exogen.shape[1]] / mean(series_training)

        #Initial Yearly Seasonality effects
        #This part of the code is only executed if the user specifies the yearly seasonality to be modelled by a fourier series

        if yearly_seasonality == "fourier":

            #defining the index as a date variable which will become relevant for subsequent computation

            yearly = pd.DataFrame({'date': series_training.index})
            yearly = yearly.set_index(pd.PeriodIndex(series_training.index, freq='D'))

            # yearly seasonality with N=10 fourier series elements
            #Therefore we define 20 elements as we have 10 sin and cos elements.

            fourier_series_sin_elements_names = ['yearly_sin365','yearly_sin365_2',
                                             'yearly_sin365_3','yearly_sin365_4',
                                             'yearly_sin365_5', 'yearly_sin365_6',
                                             'yearly_sin365_7', 'yearly_sin365_8',
                                             'yearly_sin365_9', 'yearly_sin365_10',]
            fourier_series_cos_elements_names = ['yearly_cos365','yearly_cos365_2',
                                             'yearly_cos365_3','yearly_cos365_4',
                                             'yearly_cos365_5', 'yearly_cos365_6',
                                             'yearly_cos365_7', 'yearly_cos365_8',
                                             'yearly_cos365_9', 'yearly_cos365_10']

            fourier_series_elements_multiplicator = [2,4,6,8,10,12,14,16,18,20]

            for element in range(0,10):
                yearly[fourier_series_sin_elements_names[element]] = np.sin(fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25)
                yearly[fourier_series_cos_elements_names[element]] = np.cos(fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25)

            # deleting date column as it is no longer required and should not be in the linear regression

            del yearly['date']

            # add week day dummies so that weekly seasonality is accounted for

            Weekdays = ['Monday','Tuesday','Wensday','Thursday','Friday','Saturday','Sunday']

            for days in Weekdays:
                Weekdays_number = Weekdays.index(days)

                yearly[days] = np.zeros(len(yearly))
                for i in range(0, len(yearly)):
                    if yearly.index.dayofweek[i] == Weekdays_number:
                        yearly[days][i] = 1
                    else:
                        yearly[days][i] = 0


            #Linear regression to estimate initial yearly seasonality parameters
            #we regress the 20 sin(t) and cos(t) from our fourier series + 7 weekly dummies (as control) on the entire 1 year fit data
            #this gets us estimates for the fourier series weights in the first year

            reg3 = LinearRegression().fit(yearly[0:365], series_training[0:365])

            #deviding the resulting coefficients by the mean of the data over that period
            #reason: we have multiplicative thus relative seasonality, in our regression we have absolute
            #so we divide by series mean to get relative estimates

            yearly_initial = reg3.coef_ / mean(series_training)

            # we ommit our 7 daily seasonality estimates so that we only have the yearly estimates for the optimization

            yearly_initial = yearly_initial[:-7]


        if yearly_seasonality == 'dummies':
            # Initial parameters for yearly seasonality modelled by monthly dummies

            yearly_initial = series_training.groupby(series_training.index.month).mean() / mean(series_training)

        #Defining Starting Parameters array
        #The first values are the smoothing parameters: alpha, beta, gamma, omega
        #The If loop gives back the parameters with our without yearly seasonality.

        if yearly_seasonality == "fourier" or yearly_seasonality == "dummies":

            if smoothing:
                Starting_Parameters = np.concatenate((level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters,
                                                      yearly_initial), axis=None)
            else:
                Starting_Parameters = np.concatenate((alpha,
                                                      beta,
                                                      omega,
                                                      gamma,
                                                      level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters,
                                                      yearly_initial,
                                                      epsilon), axis=None)
        else:
            if smoothing:
                Starting_Parameters = np.concatenate((level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters), axis=None)
            else:
                Starting_Parameters = np.concatenate((alpha,
                                                      beta,
                                                      gamma,
                                                      omega,
                                                      level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters), axis=None)


        return Starting_Parameters, exogen

    elif initialisation == 'ETS_Optimizing_days_around_events':

        #Computing the ETS Model with dampened additiv trend, multiplicativ error and multiplicativ 7 day seasonality

        from statsmodels.tsa.exponential_smoothing.ets import ETSModel

        #creating a copy of the series for the following step

        series_copy = series_training

        # Checking for zero days as these dont work in an ETS model with multiplicativ components,
        # and turning thm into the mean of the day before and after
        for day in range(0, len(series_copy)):
            if series_copy[day] <= 0:
                series_copy[day] = (series_copy[day - 1] + series_copy[day + 1]) / 2

        # Optimizing the Basic ETS Model
        model = ETSModel(endog=series_copy, error="mul", trend="add", damped_trend=True, seasonal="mul", seasonal_periods=7)
        fit = model.fit()

        #extracting the intial values

        alpha = fit.params[0]
        beta = fit.params[1]
        gamma = fit.params[2]
        omega = fit.params[3]
        level_initial = fit.params[4]
        slope_initial = fit.params[5]
        weekly_initial = fit.params[6:12+1]

        #calculating model errors for OLS

        errors = (series_training - fit.fittedvalues) / fit.fittedvalues

        #days before

        df_before = pd.DataFrame()
        df_before['date'] = exogen.index

        for event in exogen.columns:

            event_col_number = exogen.columns.get_loc(event)

            before_event = [i for i in range(1, days_before_events + 1)]

            for d in before_event:
                day_before = list()
                for i in range(0, len(exogen.index) - d):
                    if exogen.iloc[i + d, exogen.columns.get_loc(event)] == 1:
                        day_before.append(1)
                    else:
                        day_before.append(0)

                day_before.append(np.zeros(d))
                day_before = np.concatenate(day_before, axis=None)
                df_before[str(str(d) + '_days_before_' + event)] = day_before

        df_before.index = df_before['date']

        del df_before['date']

        #days after

        df_after = pd.DataFrame()
        df_after['date'] = exogen.index

        for event in exogen.columns:

            event_col_number = exogen.columns.get_loc(event)

            after_event = [i for i in range(1, days_after_events + 1)]

            for d in after_event:
                day_after = list()
                for i in range(0, len(exogen.index) - d):
                    if exogen.iloc[i - d, exogen.columns.get_loc(event)] == 1:
                        day_after.append(1)
                    else:
                        day_after.append(0)

                day_after.append(np.zeros(d))
                day_after = np.concatenate(day_after, axis=None)
                df_after[str(str(d) + '_days_after_' + event)] = day_after

        df_after.index = df_after['date']

        del df_after['date']

        #merging of days before and after with the exogen dataset

        exogen = pd.merge(exogen, df_before, left_index=True, right_index=True)
        exogen = pd.merge(exogen, df_after, left_index=True, right_index=True)

        #Now to the OLS Regression
        #If we include yearly seasonality in the model we include it int othe OLS to get initial values

        if yearly_seasonality == "fourier":

            # defining the index as a date variable which will become relevant for subsequent computation

            yearly = pd.DataFrame({'date': series_training.index})
            yearly.index = pd.to_datetime(series_training.index)

            # yearly seasonality with N=10 fourier series elements
            # Therefore we define 20 elements as we have 10 sin and cos elements.

            fourier_series_sin_elements_names = ['yearly_sin365', 'yearly_sin365_2',
                                                 'yearly_sin365_3', 'yearly_sin365_4',
                                                 'yearly_sin365_5', 'yearly_sin365_6',
                                                 'yearly_sin365_7', 'yearly_sin365_8',
                                                 'yearly_sin365_9', 'yearly_sin365_10', ]
            fourier_series_cos_elements_names = ['yearly_cos365', 'yearly_cos365_2',
                                                 'yearly_cos365_3', 'yearly_cos365_4',
                                                 'yearly_cos365_5', 'yearly_cos365_6',
                                                 'yearly_cos365_7', 'yearly_cos365_8',
                                                 'yearly_cos365_9', 'yearly_cos365_10']

            fourier_series_elements_multiplicator = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

            for element in range(0, 10):
                yearly[fourier_series_sin_elements_names[element]] = np.sin(
                    fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25)
                yearly[fourier_series_cos_elements_names[element]] = np.cos(
                    fourier_series_elements_multiplicator[element] * np.pi * yearly.index.dayofyear / 365.25)

            # deleting date column as it is no longer required and should not be in the linear regression

            del yearly['date']

            #OLS Model with exogen variables and fourier elements
            '''
            print(exogen[0:len(errors)].shape)
            print(yearly[0:len(errors)].shape)
            exogen.index = pd.to_datetime(exogen.index)
            #yearly.index = pd.to_datetime(yearly.index)
            print(pd.merge(exogen[0:len(errors)] , yearly[0:len(errors)],left_index=True, right_index=True).shape)
            '''
            model = sm.OLS(errors, pd.merge(exogen[0:len(errors)] , yearly[0:len(errors)],left_index=True, right_index=True) )
            results = model.fit()

            # deleting the insignificant columns, Note that we only look at the columns of the events

            to_delete_rows = []
            for i in range(0, exogen.shape[1]):
                if results.pvalues[i] >= minpvalue or abs(results.params[i]) <= mineffect:
                    to_delete_rows.append(i)

            for col in exogen.columns[to_delete_rows]:
                del exogen[col]

            # rerunning the OLS regression to obtain the exogen initial parameters

            model = sm.OLS(errors, pd.merge(exogen[0:len(errors)] , yearly[0:len(errors)],left_index=True, right_index=True) )
            results = model.fit()

            exogen_initial_parameters = results.params[0:exogen.shape[1]]
            yearly_initial = results.params[exogen.shape[1]:]

        elif yearly_seasonality == 'dummies':

            yearly = pd.DataFrame({'date': series_training.index})
            yearly = yearly.set_index(pd.PeriodIndex(series_training.index, freq='D'))

            Months = ['January', 'February ', 'March', 'April ', 'May', 'June', 'July',
                      'August', 'September', 'October', 'December']

            for Month in Months:
                Month_number = Months.index(Month) + 1 #January is month 1

                yearly[Month] = np.zeros(len(yearly))
                for i in range(0, len(yearly)):
                    if yearly.index.month[i] == Month_number:
                        yearly[Month][i] = 1
                    else:
                        yearly[Month][i] = 0

            del yearly['date']

            # OLS Model with exogen variables and fourier elements

            model = sm.OLS(errors, [exogen[0:len(errors)], yearly])
            results = model.fit()

            # deleting the insignificant columns, Note that we only look at the columns of the events

            to_delete_rows = []
            for i in range(0, exogen.shape[1]):
                if results.pvalues[i] >= minpvalue or abs(results.params[i]) <= mineffect:
                    to_delete_rows.append(i)

            for col in exogen.columns[to_delete_rows]:
                del exogen[col]

            # rerunning the OLS regression to obtain the exogen initial parameters

            model = sm.OLS(errors, e[exogen[0:len(errors)], yearly])
            results = model.fit()

            exogen_initial_parameters = results.params[0:exogen.shape[1]]
            yearly_initial = results.params[exogen.shapex[1]:]

        else:

            model = sm.OLS(errors, exogen[0:len(errors)])
            results = model.fit()

            to_delete_rows = []
            for i in range(0, exogen.shape[1]):
                if results.pvalues[i] >= minpvalue or abs(results.params[i]) <= mineffect:
                    to_delete_rows.append(i)

            #deleting the insignificant columns
            for col in exogen.columns[to_delete_rows]:
                del exogen[col]

            #rerunning the OLS regression to obtain the exogen initial parameters

            model = sm.OLS(errors, exogen[0:len(errors)])
            results = model.fit()

            exogen_initial_parameters = results.params

        #returning intial parameters

        if yearly_seasonality == "fourier" or yearly_seasonality == "dummies":

            if smoothing:
                Starting_Parameters = np.concatenate((level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters,
                                                      yearly_initial), axis=None)
            else:
                Starting_Parameters = np.concatenate((alpha,
                                                      beta,
                                                      omega,
                                                      gamma,
                                                      level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters,
                                                      yearly_initial,
                                                      epsilon), axis=None)
        else:
            if smoothing:
                Starting_Parameters = np.concatenate((level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters), axis=None)
            else:
                Starting_Parameters = np.concatenate((alpha,
                                                      beta,
                                                      gamma,
                                                      omega,
                                                      level_initial,
                                                      slope_initial,
                                                      weekly_initial,
                                                      exogen_initial_parameters), axis=None)

    return Starting_Parameters, exogen
Example #18
0
def exp_smoothing(y_train,
                  y_test,
                  forecast_horizon=1,
                  limit_steps=False,
                  pi_alpha=1.96,
                  online=True):
    """
    Train an exponential smoothing timeseries model (ETS)

    Parameters
    ----------
    y_train : pandas.DataFrame
        The train values of the target variable
    y_test : pandas.DataFrame
        Values of exogenous features
    forecast_horizon : int, default = 1
        Number of future steps to be forecasted
    limit_steps : int, default = False
        limits the number of simulation/predictions into the future. If False, steps is equal to length of validation set
    pi_alpha : float, default = 1.96
        Measure to adjust confidence interval, default is set to 1.96, which is a 95% PI
    online : bool, default = True
        if True the new observations are used to fit the model again

    Returns
    -------
    ndarray
        Expected forecast values for each test sample over the forecast horizon.
        (Shape: (len(y_train),forecast_horizon))
    ndarray
        The upper interval for the given forecasts. (Shape: (1,forecast_horizon))
    ndarray
        The lower interval for the  forecasts. (Shape: (1,forecast_horizon))
    """

    print('Train an exponential smoothing timeseries model (ETS)...')
    num_cores = max(multiprocessing.cpu_count() - 2, 1)

    model = ETSModel(y_train,
                     error="add",
                     trend="add",
                     damped_trend=True,
                     seasonal="add",
                     dates=y_train.index)
    fit = model.fit()

    def ets_predict(i):
        if online:
            # extend the train-series with observed values as we move forward in the prediction horizon
            # to achieve a receding window prediction
            y_train_i = pd.concat([y_train, y_test.iloc[0:i]])
            model = ETSModel(y_train_i,
                             error="add",
                             trend="add",
                             damped_trend=True,
                             seasonal="add",
                             dates=y_train_i.index)
            fit = model.fit()
        # There are several different ETS methods available:
        #  - forecast: makes out of sample predictions
        #  - predict: in sample and out of sample predictions
        #  - simulate: runs simulations of the statespace model
        #  - get_prediction: in sample and out of sample predictions, as well as prediction intervals
        pred = fit.get_prediction(
            start=y_test.index[i], end=y_test.index[i + forecast_horizon - 1]
        ).summary_frame(
        )  # with: method = 'simulated', simulate_repetitions=100 we can simulate the PI's
        ## --plotting current prediction--
        # plt.rcParams['figure.figsize'] = (12, 8)
        # pred["mean"].plot(label='mean prediction')
        # pred["pi_lower"].plot(linestyle='--', color='tab:blue', label='95% interval')
        # pred["pi_upper"].plot(linestyle='--', color='tab:blue', label='_')
        # y_test[i:i-1 + forecast_horizon].plot(label='true_values')
        # plt.legend()
        # plt.show()
        return pred["mean"], pred["pi_upper"], pred["pi_lower"]

    test_period = range(len(y_test))
    if limit_steps:
        test_period = range(limit_steps)

    expected_value, fc_u, fc_l = \
        zip(*Parallel(n_jobs=min(num_cores,len(test_period)), mmap_mode = 'c',
                      temp_folder='/tmp')(delayed(ets_predict)(i)
                                          for i in test_period
                                          if i+forecast_horizon<=len(y_test)))

    print('Training and validating ETS model completed.')
    return np.asarray(expected_value), np.asarray(fc_u), np.asarray(fc_l)
Example #19
0
    525.9509,
    549.8338,
    542.3405,
]
oil = pd.Series(oildata, index=pd.date_range("1965", "2013", freq="AS"))
oil.plot()
plt.ylabel("Annual oil production in Saudi Arabia (Mt)")

# The plot above shows annual oil production in Saudi Arabia in million
# tonnes. The data are taken from the R package `fpp2` (companion package to
# prior version [1]).
# Below you can see how to fit a simple exponential smoothing model using
# statsmodels's ETS implementation to this data. Additionally, the fit using
# `forecast` in R is shown as comparison.

model = ETSModel(oil)
fit = model.fit(maxiter=10000)
oil.plot(label="data")
fit.fittedvalues.plot(label="statsmodels fit")
plt.ylabel("Annual oil production in Saudi Arabia (Mt)")

# obtained from R
params_R = [
    0.99989969, 0.11888177503085334, 0.80000197, 36.46466837, 34.72584983
]
yhat = model.smooth(params_R).fittedvalues
yhat.plot(label="R fit", linestyle="--")

plt.legend()

# By default the initial states are considered to be fitting parameters