def test_weights_for_airline_averaging(): y = load_airline() y_train, y_test = temporal_train_test_split(y) forecaster = OnlineEnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped_trend", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) forecaster.fit(y_train) expected = np.array([1 / 3, 1 / 3, 1 / 3]) np.testing.assert_allclose(forecaster.weights, expected, rtol=1e-8)
def test_FittedParamExtractor(param_names): forecaster = ExponentialSmoothing() t = FittedParamExtractor(forecaster=forecaster, param_names=param_names) Xt = t.fit_transform(X_train) assert Xt.shape == (X_train.shape[0], len(t._check_param_names(param_names))) # check specific value forecaster.fit(X_train.iloc[47, 0]) fitted_param = forecaster.get_fitted_params()[param_names] assert Xt.iloc[47, 0] == fitted_param
def train_model_expSmooting(y, x, output: bool = True) -> ExponentialSmoothing: if output: logger.info("Training Exponential Smoothing model...") timer = Timer() model = ExponentialSmoothing(sp=24, seasonal='mul') y = pd.Series(data=np.delete(y, 0)) x = pd.DataFrame(data=x[:-1]) model.fit(y, x) if output: logger.info(f'Done in {timer}') return model
def run_sktimes(dept_id, store_id): # create timeseries for fbprophet ts = CreateTimeSeries(dept_id, store_id) # sktime ensembler forecaster = EnsembleForecaster([ ('naive_ses', NaiveForecaster(sp=28, strategy="seasonal_last")), ('naive', NaiveForecaster(strategy="last")), ('theta_ses', ThetaForecaster(sp=28)), ('theta', ThetaForecaster()), ("exp_ses", ExponentialSmoothing(seasonal="additive", sp=28)), ("exp_damped", ExponentialSmoothing(trend='additive', damped=True, seasonal="additive", sp=28)) ]) forecaster.fit(ts.y + 1) y_pred = forecaster.predict(np.arange(1, 29)) return np.append(np.array([dept_id, store_id]), y_pred - 1)
def get_test_params(cls): """Return testing parameter settings for the estimator. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ from sktime.forecasting.exp_smoothing import ExponentialSmoothing return {"forecaster": ExponentialSmoothing()}
def test_exponential_smoothing(): """Test bug in 1876. https://github.com/alan-turing-institute/sktime/issues/1876#issue-1103752402. """ y = load_airline() # Change index to 10 min interval freq = "10Min" time_range = pd.date_range( pd.to_datetime("2019-01-01 00:00"), pd.to_datetime("2019-01-01 23:55"), freq=freq, ) # Period Index does not work y.index = time_range.to_period() forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) forecaster.fit(y, fh=[1, 2, 3, 4, 5, 6]) y_pred = forecaster.predict() pd.testing.assert_index_equal( y_pred.index, pd.period_range("2019-01-02 00:00", periods=6, freq=freq))
def select_regressor(selection): regressors = { 'LR': LinearRegression(), 'KNN': KNeighborsRegressor(), 'RF': RandomForestRegressor(), 'GB': GradientBoostingRegressor(), 'XGBoost': XGBRegressor(verbosity = 0), 'SVM': LinearSVR(), 'Extra Trees': ExtraTreesRegressor(), 'Naive' : NaiveForecaster(strategy="last", sp=12), 'Theta': ThetaForecaster(sp=12), 'Exp_Smoothing': ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12), 'TBATS': TBATS(sp=12, use_trend=True, use_box_cox=False) } return regressors[selection]
def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ from sktime.forecasting.exp_smoothing import ExponentialSmoothing return {"forecaster": ExponentialSmoothing()}
type(y) plot_series(y) y.index y_train, y_test = temporal_train_test_split(y, test_size = 24) plot_series(y_train, y_test) fh = ForecastingHorizon(y_test.index, is_relative=False) fh ets_frcstr = ExponentialSmoothing(trend='additive', seasonal='additive', sp=12) ets_frcstr.fit(y_train) y_pred = ets_frcstr.predict(fh) plot_series(y_train, y_test, y_pred, labels=['Обучающая', 'т', 'п']) ets_frcstr.get_fitted_params() ets_frcstr.get_params() smape_loss(y_test, y_pred) auto_ets_frr = AutoETS() auto_ets_frr.fit(y_pred)
plot_ys(train, test, y_pred, labels=['train', 'test', 'preds']) plt.title( f'strategy : {strategy} - smape_loss : {round(smape_loss(test,y_pred),4)}' ) # ## Tuning # ### Tune Forecaster # In[44]: from sktime.forecasting.model_selection import SlidingWindowSplitter, ForecastingGridSearchCV # In[33]: forecaster = ExponentialSmoothing() # In[39]: forecaster_param_grid = { 'trend': ['add', 'mul'], 'seasonal': ['add', 'mul'], 'sp': [12] } # In[40]: cv = SlidingWindowSplitter(initial_window=int(len(train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=forecaster_param_grid)
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
def genforecast(data): from sktime.forecasting.model_selection import temporal_train_test_split import numpy as np import math y_train, y_test = temporal_train_test_split(data) fh = np.arange(1, len(y_test) + 1) testct = len(y_test) from sktime.forecasting.naive import NaiveForecaster forecaster = NaiveForecaster(strategy="drift") forecaster.fit(y_train) y_pred_naive = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss naive_acc = round(smape_loss(y_pred_naive, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_naive = round(min(fut_pred), 2) max_naive = round(max(fut_pred), 2) from sktime.forecasting.trend import PolynomialTrendForecaster forecaster = PolynomialTrendForecaster(degree=1) forecaster.fit(y_train) y_pred_poly = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss poly_acc = round(smape_loss(y_pred_poly, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_poly = round(min(fut_pred), 2) max_poly = round(max(fut_pred), 2) from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing sp1 = math.floor(len(y_test) / 4) sp2 = min(sp1, 12) spval = max(2, sp2) forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)), ("holt", ExponentialSmoothing(trend="add", damped=False, seasonal="multiplicative", sp=spval)), ("damped", ExponentialSmoothing(trend="add", damped=True, seasonal="multiplicative", sp=spval)) ]) forecaster.fit(y_train) y_pred_ensem = forecaster.predict(fh) ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_ensem = round(min(fut_pred), 2) max_ensem = round(max(fut_pred), 2) from sklearn.neighbors import KNeighborsRegressor regressor = KNeighborsRegressor(n_neighbors=1) from sktime.forecasting.compose import ReducedRegressionForecaster forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=15, strategy="recursive") param_grid = {"window_length": [5, 10, 15]} from sktime.forecasting.model_selection import SlidingWindowSplitter from sktime.forecasting.model_selection import ForecastingGridSearchCV # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid) gscv.fit(y_train) y_pred_redreg = gscv.predict(fh) redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4) #full model dev and forecast next 5 days gscv.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = gscv.predict(futurewin) min_redreg = round(min(fut_pred), 2) max_redreg = round(max(fut_pred), 2) return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
"steps": STEPS }, ForecastingPipeline: { "steps": STEPS }, EnsembleForecaster: { "forecasters": FORECASTERS }, StackingForecaster: { "forecasters": FORECASTERS }, AutoEnsembleForecaster: { "forecasters": FORECASTERS }, Detrender: { "forecaster": ExponentialSmoothing() }, ForecastingGridSearchCV: { "forecaster": NaiveForecaster(strategy="mean"), "cv": SingleWindowSplitter(fh=1), "param_grid": { "window_length": [2, 5] }, "scoring": MeanAbsolutePercentageError(symmetric=True), }, ForecastingRandomizedSearchCV: { "forecaster": NaiveForecaster(strategy="mean"), "cv": SingleWindowSplitter(fh=1), "param_distributions": { "window_length": [2, 5] },
import pandas as pd import pytest from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.compose._ensemble import VALID_AGG_FUNCS from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.utils._testing.forecasting import make_forecasting_problem @pytest.mark.parametrize( "forecasters", [ [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())], [("trend", PolynomialTrendForecaster()), ("ses", ExponentialSmoothing())], ], ) def test_avg_mean(forecasters): """Assert `mean` aggfunc returns the same values as `average` with equal weights.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters) forecaster.fit(y, fh=[1, 2, 3]) mean_pred = forecaster.predict() forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1]) forecaster_1.fit(y, fh=[1, 2, 3]) avg_pred = forecaster_1.predict() pd.testing.assert_series_equal(mean_pred, avg_pred)
[0 1 2 3 4 5 6 7 8 9] [10] ''') st.write(''' ### 4.2 Statistical forecasters sktime基于statsmodels中的实现,具有多种统计预测算法。 例如,要将指数平滑与可加趋势成分和可乘季节性一起使用,我们可以编写以下内容。注意,由于这是每月数据,所以季节性周期(sp)或每年的周期数为12。 ''') from sktime.forecasting.exp_smoothing import ExponentialSmoothing forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) st.write(''' 另一个常见模型是ARIMA模型。 在sktime中,我们连接pmdarima,这是一个用于自动选择最佳ARIMA模型的软件包。 这是因为搜索了许多可能的模型参数,因此可能需要更长的时间。 ''') from sktime.forecasting.arima import AutoARIMA
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
from scipy.stats import gmean from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.utils._testing.forecasting import make_forecasting_problem @pytest.mark.parametrize( "forecasters", [ [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())], [("trend", PolynomialTrendForecaster()), ("ses", ExponentialSmoothing())], ], ) def test_avg_mean(forecasters): """Assert `mean` aggfunc returns the same values as `average` with equal weights.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters) forecaster.fit(y, fh=[1, 2, 3]) mean_pred = forecaster.predict() forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1]) forecaster_1.fit(y, fh=[1, 2, 3]) avg_pred = forecaster_1.predict()
import pandas as pd import pytest from sktime.forecasting.compose import ColumnEnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.trend import PolynomialTrendForecaster @pytest.mark.parametrize( "forecasters", [ [ ("trend", PolynomialTrendForecaster(), 0), ("naive", NaiveForecaster(), 1), ("ses", ExponentialSmoothing(), 2), ] ], ) @pytest.mark.parametrize( "fh", [(np.arange(1, 11)), (np.arange(1, 33)), (np.arange(1, 3))] ) def test_column_ensemble_shape(forecasters, fh): """Check the shape of the returned prediction.""" y = pd.DataFrame(np.random.randint(0, 100, size=(100, 3)), columns=list("ABC")) forecaster = ColumnEnsembleForecaster(forecasters) forecaster.fit(y, fh=fh) actual = forecaster.predict() assert actual.shape == (len(fh), y.shape[1])
def test_set_params(): params = {"trend": "additive"} f = ExponentialSmoothing(**params) f.fit(y_train, fh=1) expected = f.predict() f = ExponentialSmoothing() f.set_params(**params) f.fit(y_train, fh=1) y_pred = f.predict() assert_array_equal(y_pred, expected)
from sktime.forecasting.theta import ThetaForecaster from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.transformers.single_series.boxcox import BoxCoxTransformer from sktime.transformers.single_series.detrend import ConditionalDeseasonalizer from sktime.transformers.single_series.detrend import Detrender from xgboost import XGBRegressor from sktime.performance_metrics.forecasting import mase_loss from sktime.performance_metrics.forecasting import smape_loss from sktime.utils.validation.forecasting import check_sp from sktime.utils.validation.forecasting import check_y from statsmodels.tsa.stattools import acf SEASONAL_MODEL = "multiplicative" ses = ExponentialSmoothing() holt = ExponentialSmoothing(trend="add", damped=False) damped = ExponentialSmoothing(trend="add", damped=True) def M4_owa_loss(mase, smape, naive2_mase, naive2_smape): """overall weighted average of sMAPE and MASE loss used in M4 competition References ---------- ..[1] https://github.com/Mcompetitions/M4-methods/blob/master /Benchmarks%20and%20Evaluation.R """ return ((np.nanmean(smape) / np.mean(naive2_smape)) + (np.nanmean(mase) / np.mean(naive2_mase))) / 2
SeriesToSeriesRowTransformer(SERIES_TO_SERIES_TRANSFORMER, check_transformer=False), ), ( "transformer2", SeriesToSeriesRowTransformer(SERIES_TO_SERIES_TRANSFORMER, check_transformer=False), ), ] REGRESSOR = LinearRegression() TIME_SERIES_CLASSIFIER = TimeSeriesForest(n_estimators=3) TIME_SERIES_CLASSIFIERS = [ ("tsf1", TIME_SERIES_CLASSIFIER), ("tsf2", TIME_SERIES_CLASSIFIER), ] FORECASTER = ExponentialSmoothing() FORECASTERS = [("ses1", FORECASTER), ("ses2", FORECASTER)] STEPS = [ ("transformer", Detrender(ThetaForecaster())), ("forecaster", NaiveForecaster()), ] ESTIMATOR_TEST_PARAMS = { OnlineEnsembleForecaster: { "forecasters": FORECASTERS }, FeatureUnion: { "transformer_list": TRANSFORMERS }, DirectRegressionForecaster: { "regressor": REGRESSOR },
if uploaded_file is not None: df, df2 = load_data(uploaded_file) # prepare models models = [] models.append(('LR', LinearRegression())) models.append(('KNN', KNeighborsRegressor())) models.append(('RF', RandomForestRegressor())) models.append(('GB', GradientBoostingRegressor())) models.append(('XGBoost', XGBRegressor(verbosity = 0))) models.append(('SVM', LinearSVR())) models.append(('Extra Trees', ExtraTreesRegressor())) models.append(('Naive', NaiveForecaster(strategy="last", sp=12))) models.append(('Theta', ThetaForecaster(sp=12))) models.append(('Exp_Smoothing', ExponentialSmoothing(trend="add", seasonal="additive", sp=12))) models.append(('TBATS', TBATS(sp=12, use_trend=True, use_box_cox=False))) forecast_horizon = st.sidebar.slider(label = 'Forecast Length (months)',min_value = 3, max_value = 36, value = 12) window_length = st.sidebar.slider(label = 'Sliding Window Length ',min_value = 1, value = 12) # evaluate each model in turn results1 = [] names = [] dn_forecast = [] dn_test =[] for name, model in models: if name == 'LR' or name == 'KNN' or name == 'RF' or name == 'GB' or name == 'XGBoost' or name == 'SVM' or name == 'Extra Trees': forecaster = ReducedRegressionForecaster(regressor=model, window_length=window_length,strategy='recursive') else: