def score(self, y_test, fh=None, X=None): """Compute the sMAPE loss for the given forecasting horizon. Parameters ---------- y_test : pd.Series Target time series to which to compare the forecasts. fh : int, list or array-like, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. Returns ------- score : float sMAPE loss of self.predict(fh=fh, X=X) with respect to y_test. See Also -------- :meth:`sktime.performance_metrics.forecasting.smape_loss`.` """ # no input checks needed here, they will be performed # in predict and loss function from sktime.performance_metrics.forecasting import smape_loss return smape_loss(y_test, self.predict(fh=fh, X=X))
def regression_performance(predicted, actual): mape = np.mean(np.abs(predicted - actual) / np.abs(actual)) * 100 # MAPE wmape = sum(np.abs(predicted - actual)) / \ sum(np.abs(actual)) * 100 # wmape me = np.mean(predicted - actual) # ME mae = np.mean(np.abs(predicted - actual)) # MAE mpe = np.mean((predicted - actual) / actual) * 100 # MPE rmse = np.mean((predicted - actual) ** 2) ** 0.5 # RMSE corr = np.corrcoef(predicted, actual)[0, 1] # corr r2 = r2_score(actual, predicted) # R2 score # mins = np.amin(np.hstack([predicted[:,None], # actual[:,None]]), axis=1) # maxs = np.amax(np.hstack([predicted[:,None], # actual[:,None]]), axis=1) smape_loss_val = smape_loss(pd.Series(actual), pd.Series(predicted)) * 100 return { "mape": mape, "wmape": wmape, "me": me, "mae": mae, "mpe": mpe, "rmse": rmse, "corr": corr, "r2": r2, "smape_loss": smape_loss_val, }
def test_smape_loss(test_y): y_test = test_y y_pred = y_test assert smape_loss(y_test, y_pred) == pytest.approx(0.0) assert smape_loss(y_pred, y_test) == pytest.approx(0.0) y_pred = y_test * 1.1 assert smape_loss(y_test, y_pred) == pytest.approx(0.095238095238) assert smape_loss(y_pred, y_test) == pytest.approx(0.095238095238) y_pred = y_test * 1.000001 assert smape_loss(y_test, y_pred) == pytest.approx(0.000001) assert smape_loss(y_pred, y_test) == pytest.approx(0.000001) y_pred = y_test * 2.0 assert smape_loss(y_test, y_pred) == pytest.approx(0.6666666) assert smape_loss(y_pred, y_test) == pytest.approx(0.6666666) y_pred = y_test * 100 assert smape_loss(y_test, y_pred) == smape_loss(y_pred, y_test)
def test_dirrec_against_recursive_accumulated_error(): # recursive and dirrec regressor strategies # dirrec regressor should produce lower error due to less cumulative error y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) estimator = LinearRegression() recursive = make_reduction(estimator, scitype="tabular-regressor", strategy="recursive") dirrec = make_reduction(estimator, scitype="tabular-regressor", strategy="dirrec") preds_recursive = recursive.fit(y_train, fh=fh).predict(fh) preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh) assert smape_loss(y_test, preds_dirrec) < smape_loss( y_test, preds_recursive)
def calculate_smape(df_, regressor, forecast_horizon, window_length): df = df_.copy() df.fillna(method = 'ffill', inplace = True) y = df.iloc[:,-1].reset_index(drop=True) y_train, y_test = temporal_train_test_split(y, test_size = 12) fh = np.arange(y_test.shape[0]) + 1 regressor = select_regressor(regressor) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=window_length, strategy='recursive') forecaster.fit(y_train, fh=fh) y_pred = forecaster.predict(fh) return smape_loss(y_pred, y_test)
def test_score(Forecaster, fh): # Check score method f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) y_pred = f.predict() fh_idx = check_fh(fh).to_indexer() # get zero based index expected = smape_loss(y_pred, y_test.iloc[fh_idx]) # compare with actual score f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) actual = f.score(y_test.iloc[fh_idx], fh=fh) assert actual == expected
def load_forecast( data, model_path='Models/6689489_NaiveForecaster_2017-01_2019-04_5.model'): """ Load saved forcasting model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp model_path: .model file path to previously saved model Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split( prepare_data(data, int(model_path.split('_')[0].split('/')[-1]), start=model_path.split('_')[-3], end=model_path.split('_')[-2]), test_size=int(model_path.split('_')[-1].split('.')[0])) fh = ForecastingHorizon(y_test.index, is_relative=False) f = load(model_path) y_pred = f.predict(fh) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
def calculate_smape(df_, regressor, forecast_horizon, window_length): df = df_.copy() dn_forecast = [] dn_test =[] results = [] if regressor == 'Naive' or regressor == 'Theta' or regressor == 'Exp_Smoothing' or regressor == 'TBATS': regressor = select_regressor(regressor) forecaster = regressor else: regressor = select_regressor(regressor) forecaster = ReducedRegressionForecaster(regressor = regressor, window_length = window_length, strategy='recursive') for i in df.columns: y = df.iloc[:,df.columns.get_loc(i)].reset_index(drop=True) y_train, y_test = temporal_train_test_split(y, test_size = 12) fh = np.arange(y_test.shape[0]) + 1 forecaster.fit(y_train, fh=fh) y_pred = forecaster.predict(fh) dn_forecast.append(y_pred) dn_test.append(y_test) dn_forecast = pd.concat(dn_forecast, axis=1) dn_test = pd.concat(dn_test, axis=1) dn_forecast.columns=dn_test.columns.tolist() fig, ax = plt.subplots(1, 1,figsize=(15, 6), facecolor='w', edgecolor='k') fig.subplots_adjust(hspace = .5, wspace=.001) #fig.suptitle('last 12 months actual vs forecast') for column in dn_test: results.append(round(100*smape_loss(dn_forecast[column],dn_test[column]),1)) ax.plot(dn_forecast['total'],'o-',color='orange' ,label="predicted") ax.plot(dn_test['total'], 'o-',color='blue',label="actual") ax.set_title('Testing the performance: last 12 month actual vs forecast') ax.legend() st.pyplot(fig) #plt.show() return pd.DataFrame(results).set_index(dn_test.columns)
def forecast_performance(forecast, actual): mape = np.mean(np.abs(forecast - actual) / np.abs(actual)) * 100 # MAPE wmape = sum(np.abs(forecast - actual)) / sum(np.abs(actual)) * 100 # wmape me = np.mean(forecast - actual) # ME mae = np.mean(np.abs(forecast - actual)) # MAE mpe = np.mean((forecast - actual) / actual) * 100 # MPE rmse = np.mean((forecast - actual) ** 2) ** 0.5 # RMSE corr = np.corrcoef(forecast, actual)[0, 1] # corr # mins = np.amin(np.hstack([forecast[:,None], # actual[:,None]]), axis=1) # maxs = np.amax(np.hstack([forecast[:,None], # actual[:,None]]), axis=1) smape_loss_val = smape_loss(pd.Series(actual), pd.Series(forecast)) * 100 return { "mape": mape, "wmape": wmape, "me": me, "mae": mae, "mpe": mpe, "rmse": rmse, "corr": corr, "smape_loss": smape_loss_val, }
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
fh ets_frcstr = ExponentialSmoothing(trend='additive', seasonal='additive', sp=12) ets_frcstr.fit(y_train) y_pred = ets_frcstr.predict(fh) plot_series(y_train, y_test, y_pred, labels=['Обучающая', 'т', 'п']) ets_frcstr.get_fitted_params() ets_frcstr.get_params() smape_loss(y_test, y_pred) auto_ets_frr = AutoETS() auto_ets_frr.fit(y_pred) auto_ets_frr.summary() arima_frr = AutoARIMA() arima_frr = ARIMA() forecaster = ARIMA( order=(1, 1, 0), seasonal_order=(0, 1, 0, 12), suppress_warnings=True )
cv = SlidingWindowSplitter(initial_window=int(len(train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=forecaster_param_grid) gscv.fit(train) y_pred = gscv.predict(fh) # In[43]: gscv.best_params_ # In[42]: plot_ys(train, test, y_pred, labels=["y_train", "y_test", "y_pred"]) smape_loss(test, y_pred) # ### Tune Forecaster & Reduced Regressor # In[51]: from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import GridSearchCV, RandomizedSearchCV # In[52]: forecaster_param_grid = {"window_length": [5, 10, 15]} regressor_param_grid = {"n_estimators": [100, 200, 300]} # In[53]:
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
def genforecast(data): from sktime.forecasting.model_selection import temporal_train_test_split import numpy as np import math y_train, y_test = temporal_train_test_split(data) fh = np.arange(1, len(y_test) + 1) testct = len(y_test) from sktime.forecasting.naive import NaiveForecaster forecaster = NaiveForecaster(strategy="drift") forecaster.fit(y_train) y_pred_naive = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss naive_acc = round(smape_loss(y_pred_naive, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_naive = round(min(fut_pred), 2) max_naive = round(max(fut_pred), 2) from sktime.forecasting.trend import PolynomialTrendForecaster forecaster = PolynomialTrendForecaster(degree=1) forecaster.fit(y_train) y_pred_poly = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss poly_acc = round(smape_loss(y_pred_poly, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_poly = round(min(fut_pred), 2) max_poly = round(max(fut_pred), 2) from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing sp1 = math.floor(len(y_test) / 4) sp2 = min(sp1, 12) spval = max(2, sp2) forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)), ("holt", ExponentialSmoothing(trend="add", damped=False, seasonal="multiplicative", sp=spval)), ("damped", ExponentialSmoothing(trend="add", damped=True, seasonal="multiplicative", sp=spval)) ]) forecaster.fit(y_train) y_pred_ensem = forecaster.predict(fh) ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_ensem = round(min(fut_pred), 2) max_ensem = round(max(fut_pred), 2) from sklearn.neighbors import KNeighborsRegressor regressor = KNeighborsRegressor(n_neighbors=1) from sktime.forecasting.compose import ReducedRegressionForecaster forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=15, strategy="recursive") param_grid = {"window_length": [5, 10, 15]} from sktime.forecasting.model_selection import SlidingWindowSplitter from sktime.forecasting.model_selection import ForecastingGridSearchCV # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid) gscv.fit(y_train) y_pred_redreg = gscv.predict(fh) redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4) #full model dev and forecast next 5 days gscv.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = gscv.predict(futurewin) min_redreg = round(min(fut_pred), 2) max_redreg = round(max(fut_pred), 2) return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
* 我们预测在同一季节观察到的最后一个值 ''') y_pred = np.repeat(y_train.iloc[-1], len(fh)) y_pred = pd.Series(y_pred, index=y_train.index[-1] + fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write(''' (2) 使用sktime ''') forecaster = NaiveForecaster(strategy="last") forecaster.fit(y_train) y_last = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_last, y_test):", smape_loss(y_last, y_test)) forecaster = NaiveForecaster(strategy="seasonal_last", sp=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_last, y_test):", smape_loss(y_last, y_test)) st.write(''' ### 4 Forecasting with sktime ### 4.1 Reduction: from forecasting to regression 从预测到回归sktime为此方法提供了一个元估算器,即: * 模块化并与scikit-learn兼容,因此我们可以轻松地应用任何scikit-learn回归器来解决我们的预测问题;