def test_avg_mean(forecasters): """Assert `mean` aggfunc returns the same values as `average` with equal weights.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters) forecaster.fit(y, fh=[1, 2, 3]) mean_pred = forecaster.predict() forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1]) forecaster_1.fit(y, fh=[1, 2, 3]) avg_pred = forecaster_1.predict() pd.testing.assert_series_equal(mean_pred, avg_pred)
def test_aggregation_unweighted(forecasters, aggfunc): """Assert aggfunc returns the correct values.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc) forecaster.fit(y, fh=[1, 2, 3]) actual_pred = forecaster.predict() predictions = [] _aggfunc = VALID_AGG_FUNCS[aggfunc]["unweighted"] for _, forecaster in forecasters: f = forecaster f.fit(y) f_pred = f.predict(fh=[1, 2, 3]) predictions.append(f_pred) predictions = pd.DataFrame(predictions).T expected_pred = predictions.apply(func=_aggfunc, axis=1) pd.testing.assert_series_equal(actual_pred, expected_pred)
def run_sktimes(dept_id, store_id): # create timeseries for fbprophet ts = CreateTimeSeries(dept_id, store_id) # sktime ensembler forecaster = EnsembleForecaster([ ('naive_ses', NaiveForecaster(sp=28, strategy="seasonal_last")), ('naive', NaiveForecaster(strategy="last")), ('theta_ses', ThetaForecaster(sp=28)), ('theta', ThetaForecaster()), ("exp_ses", ExponentialSmoothing(seasonal="additive", sp=28)), ("exp_damped", ExponentialSmoothing(trend='additive', damped=True, seasonal="additive", sp=28)) ]) forecaster.fit(ts.y + 1) y_pred = forecaster.predict(np.arange(1, 29)) return np.append(np.array([dept_id, store_id]), y_pred - 1)
def construct_M4_forecasters(sp, fh): kwargs = {"model": SEASONAL_MODEL, "sp": sp} if sp > 1 else {} theta_bc = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_R, **kwargs), BoxCoxTransformer(bounds=(0, 1)), ThetaForecaster(deseasonalise=False)) """ MLP = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python, **kwargs), Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)), RecursiveRegressionForecaster( regressor=MLPRegressor(hidden_layer_sizes=6, activation="identity", solver="adam", max_iter=100, learning_rate="adaptive", learning_rate_init=0.001), window_length=3) ) RNN = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python, **kwargs), Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)), RecursiveTimeSeriesRegressionForecaster( regressor=SimpleRNNRegressor(nb_epochs=100), window_length=3) ) """ forecasters = { "Naive": NaiveForecaster(strategy="last"), "sNaive": NaiveForecaster(strategy="seasonal_last", sp=sp), "Naive2": deseasonalise(NaiveForecaster(strategy="last"), **kwargs), "SES": deseasonalise(ses, **kwargs), "Holt": deseasonalise(holt, **kwargs), "Damped": deseasonalise(damped, **kwargs), "Theta": deseasonalise(ThetaForecaster(deseasonalise=False), **kwargs), "ARIMA": AutoARIMA(suppress_warnings=True, error_action="ignore", sp=sp), "Com": deseasonalise( EnsembleForecaster([("ses", ses), ("holt", holt), ("damped", damped)]), **kwargs), # "MLP": MLP, # "RNN": RNN, "260": theta_bc, } return forecasters
def test_invalid_aggfuncs(forecasters, aggfunc): """Check if invalid aggregation functions return Error.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc) forecaster.fit(y, fh=[1, 2]) with pytest.raises(ValueError, match=r"not recognized"): forecaster.predict()
def test_aggregation_weighted(forecasters, aggfunc, weights): """Assert weighted aggfunc returns the correct values.""" y = make_forecasting_problem() forecaster = EnsembleForecaster( forecasters=forecasters, aggfunc=aggfunc, weights=weights ) forecaster.fit(y, fh=[1, 2, 3]) actual_pred = forecaster.predict() predictions = [] for _, forecaster in forecasters: f = forecaster f.fit(y) f_pred = f.predict(fh=[1, 2, 3]) predictions.append(f_pred) predictions = pd.DataFrame(predictions).T _aggfunc = VALID_AGG_FUNCS[aggfunc]["weighted"] expected_pred = pd.Series( _aggfunc(predictions, axis=1, weights=np.array(weights)), index=predictions.index, ) # expected_pred = predictions.apply(func=_aggfunc, axis=1, weights=weights) pd.testing.assert_series_equal(actual_pred, expected_pred)
def test_aggregation_weighted(forecasters, aggfunc, weights): """Assert weighted aggfunc returns the correct values.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc, weights=weights) forecaster.fit(y, fh=[1, 2, 3]) actual_pred = forecaster.predict() predictions = [] for _, forecaster in forecasters: f = forecaster f.fit(y) f_pred = f.predict(fh=[1, 2, 3]) predictions.append(f_pred) predictions = pd.DataFrame(predictions) if aggfunc == "mean": func = np.average else: func = gmean expected_pred = predictions.apply(func=func, axis=0, weights=weights) pd.testing.assert_series_equal(actual_pred, expected_pred)
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
def genforecast(data): from sktime.forecasting.model_selection import temporal_train_test_split import numpy as np import math y_train, y_test = temporal_train_test_split(data) fh = np.arange(1, len(y_test) + 1) testct = len(y_test) from sktime.forecasting.naive import NaiveForecaster forecaster = NaiveForecaster(strategy="drift") forecaster.fit(y_train) y_pred_naive = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss naive_acc = round(smape_loss(y_pred_naive, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_naive = round(min(fut_pred), 2) max_naive = round(max(fut_pred), 2) from sktime.forecasting.trend import PolynomialTrendForecaster forecaster = PolynomialTrendForecaster(degree=1) forecaster.fit(y_train) y_pred_poly = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss poly_acc = round(smape_loss(y_pred_poly, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_poly = round(min(fut_pred), 2) max_poly = round(max(fut_pred), 2) from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing sp1 = math.floor(len(y_test) / 4) sp2 = min(sp1, 12) spval = max(2, sp2) forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)), ("holt", ExponentialSmoothing(trend="add", damped=False, seasonal="multiplicative", sp=spval)), ("damped", ExponentialSmoothing(trend="add", damped=True, seasonal="multiplicative", sp=spval)) ]) forecaster.fit(y_train) y_pred_ensem = forecaster.predict(fh) ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_ensem = round(min(fut_pred), 2) max_ensem = round(max(fut_pred), 2) from sklearn.neighbors import KNeighborsRegressor regressor = KNeighborsRegressor(n_neighbors=1) from sktime.forecasting.compose import ReducedRegressionForecaster forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=15, strategy="recursive") param_grid = {"window_length": [5, 10, 15]} from sktime.forecasting.model_selection import SlidingWindowSplitter from sktime.forecasting.model_selection import ForecastingGridSearchCV # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid) gscv.fit(y_train) y_pred_redreg = gscv.predict(fh) redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4) #full model dev and forecast next 5 days gscv.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = gscv.predict(futurewin) min_redreg = round(min(fut_pred), 2) max_redreg = round(max(fut_pred), 2) return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
sktime提供了用于组合模型构建的模块化API,以进行预测。 * Ensembling 像scikit-learn一样,sktime提供了一个元预测器来集成多种预测算法。 例如,我们可以如下组合指数平滑的不同变体: ''') from sktime.forecasting.compose import EnsembleForecaster forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ("holt", ExponentialSmoothing(trend="add", damped=False, seasonal="multiplicative", sp=12)), ("damped", ExponentialSmoothing(trend="add", damped=True, seasonal="multiplicative", sp=12)) ]) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) st.write(''' * Tuning In the `ReducedRegressionForecaster`,