Esempio n. 1
0
    def score(self, y_test, fh=None, X=None):
        """Compute the sMAPE loss for the given forecasting horizon.

        Parameters
        ----------
        y_test : pd.Series
            Target time series to which to compare the forecasts.
        fh : int, list or array-like, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, shape=[n_obs, n_vars], optional (default=None)
            An optional 2-d dataframe of exogenous variables.

        Returns
        -------
        score : float
            sMAPE loss of self.predict(fh=fh, X=X) with respect to y_test.

        See Also
        --------
        :meth:`sktime.performance_metrics.forecasting.smape_loss`.`
        """
        # no input checks needed here, they will be performed
        # in predict and loss function
        from sktime.performance_metrics.forecasting import smape_loss
        return smape_loss(y_test, self.predict(fh=fh, X=X))
Esempio n. 2
0
def regression_performance(predicted, actual):
    mape = np.mean(np.abs(predicted - actual) / np.abs(actual)) * 100  # MAPE
    wmape = sum(np.abs(predicted - actual)) / \
        sum(np.abs(actual)) * 100  # wmape
    me = np.mean(predicted - actual)  # ME
    mae = np.mean(np.abs(predicted - actual))  # MAE
    mpe = np.mean((predicted - actual) / actual) * 100  # MPE
    rmse = np.mean((predicted - actual) ** 2) ** 0.5  # RMSE
    corr = np.corrcoef(predicted, actual)[0, 1]  # corr
    r2 = r2_score(actual, predicted)  # R2 score
    #     mins = np.amin(np.hstack([predicted[:,None],
    #                               actual[:,None]]), axis=1)
    #     maxs = np.amax(np.hstack([predicted[:,None],
    #                               actual[:,None]]), axis=1)
    smape_loss_val = smape_loss(pd.Series(actual), pd.Series(predicted)) * 100
    return {
        "mape": mape,
        "wmape": wmape,
        "me": me,
        "mae": mae,
        "mpe": mpe,
        "rmse": rmse,
        "corr": corr,
        "r2": r2,
        "smape_loss": smape_loss_val,
    }
Esempio n. 3
0
def test_smape_loss(test_y):
    y_test = test_y
    y_pred = y_test
    assert smape_loss(y_test, y_pred) == pytest.approx(0.0)
    assert smape_loss(y_pred, y_test) == pytest.approx(0.0)

    y_pred = y_test * 1.1
    assert smape_loss(y_test, y_pred) == pytest.approx(0.095238095238)
    assert smape_loss(y_pred, y_test) == pytest.approx(0.095238095238)

    y_pred = y_test * 1.000001
    assert smape_loss(y_test, y_pred) == pytest.approx(0.000001)
    assert smape_loss(y_pred, y_test) == pytest.approx(0.000001)

    y_pred = y_test * 2.0
    assert smape_loss(y_test, y_pred) == pytest.approx(0.6666666)
    assert smape_loss(y_pred, y_test) == pytest.approx(0.6666666)

    y_pred = y_test * 100
    assert smape_loss(y_test, y_pred) == smape_loss(y_pred, y_test)
Esempio n. 4
0
def test_dirrec_against_recursive_accumulated_error():
    # recursive and dirrec regressor strategies
    # dirrec regressor should produce lower error due to less cumulative error
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    estimator = LinearRegression()
    recursive = make_reduction(estimator,
                               scitype="tabular-regressor",
                               strategy="recursive")
    dirrec = make_reduction(estimator,
                            scitype="tabular-regressor",
                            strategy="dirrec")

    preds_recursive = recursive.fit(y_train, fh=fh).predict(fh)
    preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh)

    assert smape_loss(y_test, preds_dirrec) < smape_loss(
        y_test, preds_recursive)
Esempio n. 5
0
def calculate_smape(df_, regressor, forecast_horizon, window_length):
    df = df_.copy()
    df.fillna(method = 'ffill', inplace = True)
    y = df.iloc[:,-1].reset_index(drop=True)
    y_train, y_test = temporal_train_test_split(y, test_size = 12)
    fh = np.arange(y_test.shape[0]) + 1
    regressor = select_regressor(regressor)
    forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=window_length,
                                             strategy='recursive')
    forecaster.fit(y_train, fh=fh)
    y_pred = forecaster.predict(fh)
    
    return smape_loss(y_pred, y_test)
Esempio n. 6
0
def test_score(Forecaster, fh):
    # Check score method
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    y_pred = f.predict()

    fh_idx = check_fh(fh).to_indexer()  # get zero based index
    expected = smape_loss(y_pred, y_test.iloc[fh_idx])

    # compare with actual score
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    actual = f.score(y_test.iloc[fh_idx], fh=fh)
    assert actual == expected
def load_forecast(
        data,
        model_path='Models/6689489_NaiveForecaster_2017-01_2019-04_5.model'):
    """
    Load saved forcasting model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    model_path: .model file
        path to previously saved model

    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(
        prepare_data(data,
                     int(model_path.split('_')[0].split('/')[-1]),
                     start=model_path.split('_')[-3],
                     end=model_path.split('_')[-2]),
        test_size=int(model_path.split('_')[-1].split('.')[0]))
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    f = load(model_path)

    y_pred = f.predict(fh)

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
Esempio n. 8
0
    def calculate_smape(df_, regressor, forecast_horizon, window_length):
        df = df_.copy()
        dn_forecast = []
        dn_test =[]
        results = []
        if regressor == 'Naive' or regressor == 'Theta' or regressor == 'Exp_Smoothing' or regressor == 'TBATS':
            regressor = select_regressor(regressor)
            forecaster = regressor
        else:
            regressor = select_regressor(regressor)
            forecaster = ReducedRegressionForecaster(regressor = regressor, window_length = window_length, strategy='recursive')
        for i in df.columns:
            y = df.iloc[:,df.columns.get_loc(i)].reset_index(drop=True)
            y_train, y_test = temporal_train_test_split(y, test_size = 12)
            fh = np.arange(y_test.shape[0]) + 1
            forecaster.fit(y_train, fh=fh)
            y_pred = forecaster.predict(fh)
            dn_forecast.append(y_pred)
            dn_test.append(y_test)
        dn_forecast = pd.concat(dn_forecast, axis=1)
        dn_test = pd.concat(dn_test, axis=1)
        dn_forecast.columns=dn_test.columns.tolist()
        
        
        fig, ax = plt.subplots(1, 1,figsize=(15, 6), facecolor='w', edgecolor='k')
        fig.subplots_adjust(hspace = .5, wspace=.001)
        #fig.suptitle('last 12 months actual vs forecast')

        for column in dn_test:
            results.append(round(100*smape_loss(dn_forecast[column],dn_test[column]),1))
        
        ax.plot(dn_forecast['total'],'o-',color='orange' ,label="predicted")
        ax.plot(dn_test['total'], 'o-',color='blue',label="actual")
        ax.set_title('Testing the performance: last 12 month actual vs forecast')
        ax.legend()
        st.pyplot(fig)
        #plt.show()
        return pd.DataFrame(results).set_index(dn_test.columns)
Esempio n. 9
0
def forecast_performance(forecast, actual):
    mape = np.mean(np.abs(forecast - actual) / np.abs(actual)) * 100  # MAPE
    wmape = sum(np.abs(forecast - actual)) / sum(np.abs(actual)) * 100  # wmape
    me = np.mean(forecast - actual)  # ME
    mae = np.mean(np.abs(forecast - actual))  # MAE
    mpe = np.mean((forecast - actual) / actual) * 100  # MPE
    rmse = np.mean((forecast - actual) ** 2) ** 0.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0, 1]  # corr

    #     mins = np.amin(np.hstack([forecast[:,None],
    #                               actual[:,None]]), axis=1)
    #     maxs = np.amax(np.hstack([forecast[:,None],
    #                               actual[:,None]]), axis=1)
    smape_loss_val = smape_loss(pd.Series(actual), pd.Series(forecast)) * 100
    return {
        "mape": mape,
        "wmape": wmape,
        "me": me,
        "mae": mae,
        "mpe": mpe,
        "rmse": rmse,
        "corr": corr,
        "smape_loss": smape_loss_val,
    }
Esempio n. 10
0
def main():
    df = datasets.load_airline(
    )  #Univariate, monthly records from 1949 to 60 (144 records)
    y_train, y_test = temporal_train_test_split(
        df, test_size=36)  #36 months for testing

    forecaster = NaiveForecaster(
        strategy='seasonal_last', sp=12
    )  #model strategy: last, mean, seasonal_last. sp=12months (yearly season)
    forecaster.fit(y_train)  #fit
    fh = np.arange(1,
                   len(y_test) +
                   1)  #forecast horizon: array with the same lenght of y_test
    y_pred = forecaster.predict(fh)  #pred

    forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1)
    forecaster2.fit(y_train)
    y_pred2 = forecaster2.predict(fh)

    forecaster3 = ExponentialSmoothing(trend='add',
                                       damped='True',
                                       seasonal='multiplicative',
                                       sp=12)
    forecaster3.fit(y_train)
    y_pred3 = forecaster3.predict(fh)

    forecaster4 = ThetaForecaster(sp=12)
    forecaster4.fit(y_train)
    y_pred4 = forecaster4.predict(fh)

    forecaster5 = EnsembleForecaster([
        ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)),
        ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)),
        ('Exp Smoothing',
         ExponentialSmoothing(trend='add',
                              damped='True',
                              seasonal='multiplicative',
                              sp=12)), ('Theta', ThetaForecaster(sp=12))
    ])
    forecaster5.fit(y_train)
    y_pred5 = forecaster5.predict(fh)

    plot_ys(y_train,
            y_test,
            y_pred,
            y_pred2,
            y_pred3,
            y_pred4,
            y_pred5,
            labels=[
                'Train', 'Test', 'Naive Forecaster', 'AutoARIMA',
                'Exp Smoothing', 'Theta', 'Ensemble'
            ])
    plt.xlabel('Months')
    plt.ylabel('Number of flights')
    plt.title(
        'Time series of the number of international flights in function of time'
    )
    plt.show()

    print('SMAPE Error for NaiveForecaster is:',
          100 * round(smape_loss(y_test, y_pred), 3), '%')
    print('SMAPE Error for AutoARIMA is:',
          100 * round(smape_loss(y_test, y_pred2), 3), '%')
    print('SMAPE Error for Exp Smoothing is:',
          100 * round(smape_loss(y_test, y_pred3), 3), '%')
    print('SMAPE Error for Theta is:',
          100 * round(smape_loss(y_test, y_pred4), 3), '%')
    print('SMAPE Error for Ensemble is:',
          100 * round(smape_loss(y_test, y_pred5), 3), '%')
Esempio n. 11
0
fh


ets_frcstr = ExponentialSmoothing(trend='additive', seasonal='additive', sp=12)

ets_frcstr.fit(y_train)



y_pred = ets_frcstr.predict(fh)
plot_series(y_train, y_test, y_pred, labels=['Обучающая', 'т', 'п'])

ets_frcstr.get_fitted_params()
ets_frcstr.get_params()

smape_loss(y_test, y_pred)

auto_ets_frr = AutoETS()
auto_ets_frr.fit(y_pred)


auto_ets_frr.summary()

arima_frr = AutoARIMA()
arima_frr = ARIMA()

forecaster = ARIMA(
    order=(1, 1, 0), seasonal_order=(0, 1, 0, 12), suppress_warnings=True
)

Esempio n. 12
0
cv = SlidingWindowSplitter(initial_window=int(len(train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster,
                               cv=cv,
                               param_grid=forecaster_param_grid)
gscv.fit(train)
y_pred = gscv.predict(fh)

# In[43]:

gscv.best_params_

# In[42]:

plot_ys(train, test, y_pred, labels=["y_train", "y_test", "y_pred"])
smape_loss(test, y_pred)

# ### Tune Forecaster & Reduced Regressor

# In[51]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# In[52]:

forecaster_param_grid = {"window_length": [5, 10, 15]}
regressor_param_grid = {"n_estimators": [100, 200, 300]}

# In[53]:
def forecast(data,
             customer_id,
             start='2017-01',
             end='2019-04',
             model_type='NaiveForecaster',
             test_size_month=5,
             model_storage_path=''):
    """
    Main function for build forecasting model on selected customer and time interval, save the model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    customer_id: int

    start: string
        start year and month in '2020-01' format

    end: string
        end year and month in '2020-01' format *** this month will not be included ***

    model_type:
        type of model to use in forecasting
        select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor',
                       'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster']

    test_size_month:
        number of month that will be excluded from end of interval to use as test dataset

    model_storage_path: string
        the folder that you want to store saved models
    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(prepare_data(data,
                                                             customer_id,
                                                             start=start,
                                                             end=end),
                                                test_size=test_size_month)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    if model_type == 'NaiveForecaster':
        forecaster = NaiveForecaster(strategy="last", sp=12)
    elif model_type == 'PolynomialTrendForecaster':
        forecaster = PolynomialTrendForecaster(degree=2)
    elif model_type == 'ThetaForecaster':
        forecaster = ThetaForecaster(sp=6)
    elif model_type == 'KNeighborsRegressor':
        regressor = KNeighborsRegressor(n_neighbors=1)
        forecaster = ReducedRegressionForecaster(regressor=regressor,
                                                 window_length=12,
                                                 strategy="recursive")
    elif model_type == 'ExponentialSmoothing':
        forecaster = ExponentialSmoothing(trend="add",
                                          seasonal="multiplicative",
                                          sp=12)
    elif model_type == 'AutoETS':
        forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)
    elif model_type == 'AutoARIMA':
        forecaster = AutoARIMA(sp=12, suppress_warnings=True)
    elif model_type == 'TBATS':
        forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'BATS':
        forecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'EnsembleForecaster':
        forecaster = EnsembleForecaster([
            ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
            (
                "holt",
                ExponentialSmoothing(trend="add",
                                     damped_trend=False,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
            (
                "damped",
                ExponentialSmoothing(trend="add",
                                     damped_trend=True,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
        ])

    try:
        forecaster.fit(y_train)
    except:
        forecaster.fit(y_train + 1)

    y_pred = forecaster.predict(fh)
    dump(
        forecaster,
        f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model'
    )

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
Esempio n. 14
0
def genforecast(data):
    from sktime.forecasting.model_selection import temporal_train_test_split
    import numpy as np
    import math
    y_train, y_test = temporal_train_test_split(data)
    fh = np.arange(1, len(y_test) + 1)
    testct = len(y_test)

    from sktime.forecasting.naive import NaiveForecaster
    forecaster = NaiveForecaster(strategy="drift")
    forecaster.fit(y_train)
    y_pred_naive = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    naive_acc = round(smape_loss(y_pred_naive, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_naive = round(min(fut_pred), 2)
    max_naive = round(max(fut_pred), 2)

    from sktime.forecasting.trend import PolynomialTrendForecaster
    forecaster = PolynomialTrendForecaster(degree=1)
    forecaster.fit(y_train)
    y_pred_poly = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    poly_acc = round(smape_loss(y_pred_poly, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_poly = round(min(fut_pred), 2)
    max_poly = round(max(fut_pred), 2)

    from sktime.forecasting.compose import EnsembleForecaster
    from sktime.forecasting.exp_smoothing import ExponentialSmoothing
    sp1 = math.floor(len(y_test) / 4)
    sp2 = min(sp1, 12)
    spval = max(2, sp2)
    forecaster = EnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)),
        ("holt",
         ExponentialSmoothing(trend="add",
                              damped=False,
                              seasonal="multiplicative",
                              sp=spval)),
        ("damped",
         ExponentialSmoothing(trend="add",
                              damped=True,
                              seasonal="multiplicative",
                              sp=spval))
    ])
    forecaster.fit(y_train)
    y_pred_ensem = forecaster.predict(fh)
    ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_ensem = round(min(fut_pred), 2)
    max_ensem = round(max(fut_pred), 2)

    from sklearn.neighbors import KNeighborsRegressor
    regressor = KNeighborsRegressor(n_neighbors=1)
    from sktime.forecasting.compose import ReducedRegressionForecaster
    forecaster = ReducedRegressionForecaster(regressor=regressor,
                                             window_length=15,
                                             strategy="recursive")
    param_grid = {"window_length": [5, 10, 15]}
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    from sktime.forecasting.model_selection import ForecastingGridSearchCV
    # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
    cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
    gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
    gscv.fit(y_train)
    y_pred_redreg = gscv.predict(fh)
    redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4)
    #full model dev and forecast next 5 days
    gscv.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = gscv.predict(futurewin)
    min_redreg = round(min(fut_pred), 2)
    max_redreg = round(max(fut_pred), 2)

    return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
Esempio n. 15
0
    * 我们预测在同一季节观察到的最后一个值
''')
y_pred = np.repeat(y_train.iloc[-1], len(fh))
y_pred = pd.Series(y_pred, index=y_train.index[-1] + fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()

st.write('''
    (2) 使用sktime
''')
forecaster = NaiveForecaster(strategy="last")
forecaster.fit(y_train)
y_last = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_last, y_test):", smape_loss(y_last, y_test))

forecaster = NaiveForecaster(strategy="seasonal_last", sp=12)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_last, y_test):", smape_loss(y_last, y_test))

st.write('''
    ### 4 Forecasting with sktime
    
    ### 4.1 Reduction: from forecasting to regression
    
    从预测到回归sktime为此方法提供了一个元估算器,即:
    * 模块化并与scikit-learn兼容,因此我们可以轻松地应用任何scikit-learn回归器来解决我们的预测问题;