def _transform(self, X, y=None): """Transform X and return a transformed version. private _transform containing the core logic, called from transform Parameters ---------- X : pd.Series or pd.DataFrame Data to be transformed y : ignored argument for interface compatibility Additional data, e.g., labels for transformation Returns ------- theta_lines: pd.Series or pd.DataFrame Transformed series pd.Series, with single Theta-line, if self.theta is float pd.DataFrame of shape: [len(X), len(self.theta)], if self.theta is tuple """ z = X theta = _check_theta(self.theta) forecaster = PolynomialTrendForecaster() forecaster.fit(z) fh = ForecastingHorizon(z.index, is_relative=False) trend = forecaster.predict(fh) theta_lines = np.zeros((z.shape[0], len(theta))) for i, theta in enumerate(theta): theta_lines[:, i] = _theta_transform(z, trend, theta) if isinstance(self.theta, (float, int)): return pd.Series(theta_lines.flatten(), index=z.index) else: return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
def transform(self, Z, X=None): """Transform data. Parameters ---------- Z : pd.Series Series to transform. X : pd.DataFrame, optional (default=None) Exogenous data used in transformation. Returns ------- theta_lines: ndarray or pd.DataFrame Transformed series: single Theta-line or a pd.DataFrame of shape: len(Z)*len(self.theta). """ self.check_is_fitted() z = check_series(Z, enforce_univariate=True) theta = _check_theta(self.theta) forecaster = PolynomialTrendForecaster() forecaster.fit(z) fh = ForecastingHorizon(z.index, is_relative=False) trend = forecaster.predict(fh) theta_lines = np.zeros((z.shape[0], len(theta))) for i, theta in enumerate(theta): theta_lines[:, i] = _theta_transform(z, trend, theta) if isinstance(self.theta, (float, int)): return pd.Series(theta_lines.flatten(), index=z.index) else: return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
def learn(series_data): model = TransformedTargetForecaster([ ("deseasonalise", Deseasonalizer(model="multiplicative", sp=7)), ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=4))), ("forecast", PolynomialTrendForecaster(degree=4)) ]) model.fit(series_data[:-2]) return model
def test_constant_trend(): y = pd.Series(np.arange(30)) fh = -np.arange(30) # in-sample fh forecaster = PolynomialTrendForecaster(degree=1) y_pred = forecaster.fit(y).predict(fh) np.testing.assert_array_almost_equal(y, y_pred)
def check_trend(degree, with_intercept): """Helper function to check trend""" y = load_airline() f = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept) f.fit(y) a = f.regressor_.steps[-1][1].coef_[ ::-1] # intercept is added in reverse order b = compute_expected_coefs(y, degree, with_intercept) np.testing.assert_allclose(a, b)
def _test_trend(degree, with_intercept): """Helper function to check trend""" y = make_forecasting_problem() forecaster = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept) forecaster.fit(y) # check coefficients # intercept is added in reverse order actual = forecaster.regressor_.steps[-1][1].coef_[::-1] expected = get_expected_polynomial_coefs(y, degree, with_intercept) np.testing.assert_allclose(actual, expected)
def transform(self, Z, X=None): """Transform data. Returns a transformed version of Z. Parameters ---------- Z : pd.Series Returns ------- z : pd.Series Transformed time series. """ self.check_is_fitted() self._check_method() z = check_series(Z, enforce_univariate=True) # replace missing_values with np.nan if self.missing_values: z = z.replace(to_replace=self.missing_values, value=np.nan) if self.method == "random": z = z.apply(lambda x: self._get_random(z) if np.isnan(x) else x) elif self.method == "constant": z = z.fillna(value=self.value) elif self.method in ["backfill", "bfill", "pad", "ffill"]: z = z.fillna(method=self.method) elif self.method in ["drift", "forecaster"]: if self.method == "forecaster": forecaster = self.forecaster else: forecaster = PolynomialTrendForecaster(degree=1) # in-sample forecasting horizon fh_ins = -np.arange(len(z)) # fill NaN before fitting with ffill and backfill (heuristic) z_pred = forecaster.fit( z.fillna(method="ffill").fillna(method="backfill")).predict( fh=fh_ins) # fill with trend values z = z.fillna(value=z_pred) elif self.method == "mean": z = z.fillna(value=z.mean()) elif self.method == "median": z = z.fillna(value=z.median()) elif self.method in ["nearest", "linear"]: z = z.interpolate(method=self.method) else: raise ValueError(f"method {self.method} not available") return z
def _fit(self, X, y=None): """Fit transformer to X and y. private _fit containing the core logic, called from fit Parameters ---------- X : pd.Series or pd.DataFrame Data to fit transform to y : pd.DataFrame, default=None Additional data, e.g., labels for transformation Returns ------- self: a fitted instance of the estimator """ z = X if self.forecaster is None: self.forecaster = PolynomialTrendForecaster(degree=1) # multivariate if isinstance(z, pd.DataFrame): self.forecaster_ = {} for colname in z.columns: forecaster = clone(self.forecaster) self.forecaster_[colname] = forecaster.fit(X[colname], y) # univariate else: forecaster = clone(self.forecaster) self.forecaster_ = forecaster.fit(z, X) return self
def test_polynomial_detrending(): import numpy as np import pandas as pd from sktime.forecasting.tests.test_trend import get_expected_polynomial_coefs from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.transformations.series.detrend import Detrender y = pd.Series(np.arange(20) * 0.5) + np.random.normal(0, 1, size=20) forecaster = PolynomialTrendForecaster(degree=1, with_intercept=True) transformer = Detrender(forecaster) transformer.fit(y) # check coefficients actual_coefs = transformer.forecaster_.regressor_.steps[-1][-1].coef_ expected_coefs = get_expected_polynomial_coefs(y, degree=1, with_intercept=True)[::-1] np.testing.assert_array_almost_equal(actual_coefs, expected_coefs) # check trend expected_trend = expected_coefs[0] + np.arange(len(y)) * expected_coefs[1] actual_trend = transformer.forecaster_.predict(-np.arange(len(y))) np.testing.assert_array_almost_equal(actual_trend, expected_trend) # check residuals actual = transformer.transform(y) expected = y - expected_trend np.testing.assert_array_almost_equal(actual, expected)
def _fit(self, X, y=None): """Fit transformer to X and y. private _fit containing the core logic, called from fit Parameters ---------- X : pd.Series or pd.DataFrame Data to fit transform to y : pd.DataFrame, default=None Additional data, e.g., labels for transformation Returns ------- self: a fitted instance of the estimator """ if self.forecaster is None: self.forecaster = PolynomialTrendForecaster(degree=1) # univariate: X is pd.Series if isinstance(X, pd.Series): forecaster = clone(self.forecaster) # note: the y in the transformer is exogeneous in the forecaster, i.e., X self.forecaster_ = forecaster.fit(y=X, X=y) # multivariate elif isinstance(X, pd.DataFrame): self.forecaster_ = {} for colname in X.columns: forecaster = clone(self.forecaster) self.forecaster_[colname] = forecaster.fit(y=X[colname], X=y) else: raise TypeError("X must be pd.Series or pd.DataFrame") return self
def test_pipeline(): y = load_airline() y_train, y_test = temporal_train_test_split(y) forecaster = TransformedTargetForecaster([ ("t1", Deseasonalizer(sp=12, model="multiplicative")), ("t2", Detrender(PolynomialTrendForecaster(degree=1))), ("forecaster", NaiveForecaster()), ]) fh = np.arange(len(y_test)) + 1 forecaster.fit(y_train, fh=fh) actual = forecaster.predict() def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = Deseasonalizer(sp=12, model="multiplicative") yt = t1.fit_transform(yt) t2 = Detrender(PolynomialTrendForecaster(degree=1)) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred expected = compute_expected_y_pred(y_train, fh) np.testing.assert_array_equal(actual, expected)
def fit(self, Z, X=None): """ Compute the trend in the series Parameters ---------- Y : pd.Series Endogenous time series to fit a trend to. X : pd.DataFrame, optional (default=None) Exogenous variables Returns ------- self : an instance of self """ self._is_fitted = False z = check_series(Z) if self.forecaster is None: self.forecaster = PolynomialTrendForecaster(degree=1) # multivariate if isinstance(z, pd.DataFrame): self.forecaster_ = {} for colname in z.columns: forecaster = clone(self.forecaster) self.forecaster_[colname] = forecaster.fit(z[colname], X) # univariate else: forecaster = clone(self.forecaster) self.forecaster_ = forecaster.fit(z, X) self._is_fitted = True return self
def _transform(self, X, y=None): """Transform X and return a transformed version. private _transform containing the core logic, called from transform Parameters ---------- X : pd.Series or pd.DataFrame Data to be transformed y : ignored argument for interface compatibility Additional data, e.g., labels for transformation Returns ------- Z : pd.Series or pd.DataFrame, same type as X transformed version of X """ self._check_method() Z = X.copy() # replace missing_values with np.nan if self.missing_values: Z = Z.replace(to_replace=self.missing_values, value=np.nan) if not _has_missing_values(Z): return Z if self.method == "random": if isinstance(Z, pd.DataFrame): for col in Z: Z[col] = Z[col].apply(lambda i: self._get_random(Z[col]) if np.isnan(i) else i) else: Z = Z.apply(lambda i: self._get_random(Z) if np.isnan(i) else i) elif self.method == "constant": Z = Z.fillna(value=self.value) elif self.method in ["backfill", "bfill", "pad", "ffill"]: Z = Z.fillna(method=self.method) elif self.method == "drift": forecaster = PolynomialTrendForecaster(degree=1) Z = _impute_with_forecaster(forecaster, Z) elif self.method == "forecaster": forecaster = clone(self.forecaster) Z = _impute_with_forecaster(forecaster, Z) elif self.method == "mean": Z = Z.fillna(value=Z.mean()) elif self.method == "median": Z = Z.fillna(value=Z.median()) elif self.method in ["nearest", "linear"]: Z = Z.interpolate(method=self.method) else: raise ValueError(f"`method`: {self.method} not available.") # fill first/last elements of series, # as some methods (e.g. "linear") cant impute those Z = Z.fillna(method="ffill").fillna(method="backfill") return Z
def transform(self, Z, X=None): """Transform data. Returns a transformed version of Z. Parameters ---------- Z : pd.Series, pd.DataFrame Returns ------- Z : pd.Series, pd.DataFrame Transformed time series(es). """ self.check_is_fitted() self._check_method() Z = check_series(Z) Z = Z.copy() # replace missing_values with np.nan if self.missing_values: Z = Z.replace(to_replace=self.missing_values, value=np.nan) if not _has_missing_values(Z): return Z elif self.method == "random": if isinstance(Z, pd.DataFrame): for col in Z: Z[col] = Z[col].apply( lambda i: self._get_random(Z[col]) if np.isnan(i) else i ) else: Z = Z.apply(lambda i: self._get_random(Z) if np.isnan(i) else i) elif self.method == "constant": Z = Z.fillna(value=self.value) elif self.method in ["backfill", "bfill", "pad", "ffill"]: Z = Z.fillna(method=self.method) elif self.method == "drift": forecaster = PolynomialTrendForecaster(degree=1) Z = _impute_with_forecaster(forecaster, Z) elif self.method == "forecaster": forecaster = clone(self.forecaster) Z = _impute_with_forecaster(forecaster, Z) elif self.method == "mean": Z = Z.fillna(value=Z.mean()) elif self.method == "median": Z = Z.fillna(value=Z.median()) elif self.method in ["nearest", "linear"]: Z = Z.interpolate(method=self.method) else: raise ValueError(f"`method`: {self.method} not available.") # fill first/last elements of series, # as some methods (e.g. "linear") cant impute those Z = Z.fillna(method="ffill").fillna(method="backfill") return Z
def test_linear_detrending(): y = load_airline() f = PolynomialTrendForecaster(degree=1, with_intercept=True) t = Detrender(f) a = t.fit_transform(y) b = compute_expected_detrend(y, 1, with_intercept=True) np.testing.assert_allclose(a, b)
def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = Deseasonalizer(sp=12, model="multiplicative") yt = t1.fit_transform(yt) t2 = Detrender(PolynomialTrendForecaster(degree=1)) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred
def fit(self, Z, X=None): """ Compute the trend in the series Parameters ---------- Y : pd.Series Endogenous time series to fit a trend to. X : pd.DataFrame, optional (default=None) Exogenous variables Returns ------- self : an instance of self """ z = check_series(Z, enforce_univariate=True) if self.forecaster is None: self.forecaster = PolynomialTrendForecaster(degree=1) forecaster = clone(self.forecaster) self.forecaster_ = forecaster.fit(z, X) self._is_fitted = True return self
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) st.write("gscv.best_params_:", gscv.best_params_) st.write(''' * Detrending 请注意,到目前为止,上述减少方法并未考虑任何季节或趋势,但我们可以轻松地指定首先对数据进行趋势去除的管道。 sktime提供了一个通用的去趋势器,它是一个使用任何预测器并返回预测器预测值的样本内残差的转换器。 例如,要删除时间序列的线性趋势,我们可以写成 ''') from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.transformers.single_series.detrend import Detrender # liner detrending forecaster = PolynomialTrendForecaster(degree=1) transformer = Detrender(forecaster=forecaster) yt = transformer.fit_transform(y_train) # internally, the Detrender uses the in-sample predictions of the PolynomialTrendForecaster forecaster = PolynomialTrendForecaster(degree=1) fh_ins = -np.arange(len(y_train)) # in-sample forecasting horizon y_pred = forecaster.fit(y_train).predict(fh=fh_ins) plot_ys(y_train, y_pred, yt, labels=["y_train", "Fitted linear trend", "Residuals"]) st.pyplot() st.write(''' * Pipelining 让我们在管道中使用**去趋势剂**和**去季节化**。
best_idx = gscv.best_index_ assert best_idx == actual.argmin() best_params = gscv.best_params_ assert best_params == param_grid[best_idx] # Check if best parameters are contained in best forecaster. best_forecaster_params = gscv.best_forecaster_.get_params() best_params = gscv.best_params_ assert best_params.items() <= best_forecaster_params.items() NAIVE = NaiveForecaster(strategy="mean") NAIVE_GRID = {"window_length": TEST_WINDOW_LENGTHS} PIPE = TransformedTargetForecaster([ ("transformer", Detrender(PolynomialTrendForecaster())), ("forecaster", ARIMA()), ]) PIPE_GRID = { "transformer__forecaster__degree": [1, 2], "forecaster__with_intercept": [True, False], } CVs = [ *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS], SlidingWindowSplitter(fh=1, initial_window=15), ] @pytest.mark.parametrize("forecaster, param_grid", [(NAIVE, NAIVE_GRID), (PIPE, PIPE_GRID)]) @pytest.mark.parametrize("scoring", TEST_METRICS)
import numpy as np import pandas as pd import pytest from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.compose._ensemble import VALID_AGG_FUNCS from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.utils._testing.forecasting import make_forecasting_problem @pytest.mark.parametrize( "forecasters", [ [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())], [("trend", PolynomialTrendForecaster()), ("ses", ExponentialSmoothing())], ], ) def test_avg_mean(forecasters): """Assert `mean` aggfunc returns the same values as `average` with equal weights.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters) forecaster.fit(y, fh=[1, 2, 3]) mean_pred = forecaster.predict() forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1]) forecaster_1.fit(y, fh=[1, 2, 3]) avg_pred = forecaster_1.predict() pd.testing.assert_series_equal(mean_pred, avg_pred)
import pytest import sys from scipy.stats import gmean from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.trend import PolynomialTrendForecaster from sktime.utils._testing.forecasting import make_forecasting_problem @pytest.mark.parametrize( "forecasters", [ [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())], [("trend", PolynomialTrendForecaster()), ("ses", ExponentialSmoothing())], ], ) def test_avg_mean(forecasters): """Assert `mean` aggfunc returns the same values as `average` with equal weights.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters) forecaster.fit(y, fh=[1, 2, 3]) mean_pred = forecaster.predict() forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1]) forecaster_1.fit(y, fh=[1, 2, 3])
def transform(self, Z, X=None): """Transform data. Returns a transformed version of Z. Parameters ---------- Z : pd.Series, pd.DataFrame Returns ------- Z : pd.Series, pd.DataFrame Transformed time series(es). """ self.check_is_fitted() self._check_method() Z = check_series(Z) # replace missing_values with np.nan if self.missing_values: Z = Z.replace(to_replace=self.missing_values, value=np.nan) if self.method == "random": if isinstance(Z, pd.DataFrame): for col in Z: Z[col] = Z[col].apply(lambda i: self._get_random(Z[col]) if np.isnan(i) else i) else: Z = Z.apply(lambda i: self._get_random(Z) if np.isnan(i) else i) elif self.method == "constant": Z = Z.fillna(value=self.value) elif self.method in ["backfill", "bfill", "pad", "ffill"]: Z = Z.fillna(method=self.method) elif self.method in ["drift", "forecaster"]: if self.method == "forecaster": forecaster = self.forecaster else: forecaster = PolynomialTrendForecaster(degree=1) # in-sample forecasting horizon fh_ins = -np.arange(len(Z)) # fill NaN before fitting with ffill and backfill (heuristic) Z = Z.fillna(method="ffill").fillna(method="backfill") # multivariate if isinstance(Z, pd.DataFrame): for col in Z: forecaster.fit(y=Z[col]) Z_pred = forecaster.predict(fh=fh_ins) Z[col] = Z[col].fillna(value=Z_pred) # univariate else: forecaster.fit(y=Z) Z_pred = forecaster.predict(fh=fh_ins) Z = Z.fillna(value=Z_pred) elif self.method == "mean": Z = Z.fillna(value=Z.mean()) elif self.method == "median": Z = Z.fillna(value=Z.median()) elif self.method in ["nearest", "linear"]: Z = Z.interpolate(method=self.method) else: raise ValueError(f"method {self.method} not available") # fill first/last elements of series, # as some methods (e.g. "linear") cant impute those Z = Z.fillna(method="ffill").fillna(method="backfill") return Z
import numpy as np import pandas as pd import pytest from sktime.forecasting.compose import ColumnEnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.trend import PolynomialTrendForecaster @pytest.mark.parametrize( "forecasters", [ [ ("trend", PolynomialTrendForecaster(), 0), ("naive", NaiveForecaster(), 1), ("ses", ExponentialSmoothing(), 2), ] ], ) @pytest.mark.parametrize( "fh", [(np.arange(1, 11)), (np.arange(1, 33)), (np.arange(1, 3))] ) def test_column_ensemble_shape(forecasters, fh): """Check the shape of the returned prediction.""" y = pd.DataFrame(np.random.randint(0, 100, size=(100, 3)), columns=list("ABC")) forecaster = ColumnEnsembleForecaster(forecasters) forecaster.fit(y, fh=fh) actual = forecaster.predict() assert actual.shape == (len(fh), y.shape[1])
y_test_subset = y_test.loc[ y_pred.index ] # select only time points which we predicted scores[i] = scoring(y_test_subset, y_pred) return scores @pytest.mark.parametrize( "forecaster, param_dict", [ (NaiveForecaster(strategy="mean"), {"window_length": TEST_WINDOW_LENGTHS}), # atomic estimator ( TransformedTargetForecaster( [ # composite estimator ("t", Detrender(PolynomialTrendForecaster())), ("f", ReducedForecaster(LinearRegression(), scitype="regressor")), ] ), { "f__window_length": TEST_WINDOW_LENGTHS, "f__step_length": TEST_STEP_LENGTHS, }, ), # multiple params ], ) @pytest.mark.parametrize( "scoring", [sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False)], ) @pytest.mark.parametrize(
pd.DataFrame(gscv.cv_results_) # ## Pipelines # In[86]: from sktime.forecasting.compose import TransformedTargetForecaster from sktime.transformers.single_series.detrend import Detrender, Deseasonalizer from sktime.forecasting.trend import PolynomialTrendForecaster # ### Detrending # In[126]: model = PolynomialTrendForecaster(degree=1) transformer = Detrender(model) yt = transformer.fit_transform(train) trendline = model.fit(train).predict(fh=-np.arange(len(train))) plot_ys(train, trendline, yt, labels=['series', 'trend', 'detrended']) # ### Pipelining # In[130]: forecaster = TransformedTargetForecaster([ ("deseasonalise", Deseasonalizer(model="multiplicative", sp=12)), ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=1))), ("forecast",
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
def genforecast(data): from sktime.forecasting.model_selection import temporal_train_test_split import numpy as np import math y_train, y_test = temporal_train_test_split(data) fh = np.arange(1, len(y_test) + 1) testct = len(y_test) from sktime.forecasting.naive import NaiveForecaster forecaster = NaiveForecaster(strategy="drift") forecaster.fit(y_train) y_pred_naive = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss naive_acc = round(smape_loss(y_pred_naive, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_naive = round(min(fut_pred), 2) max_naive = round(max(fut_pred), 2) from sktime.forecasting.trend import PolynomialTrendForecaster forecaster = PolynomialTrendForecaster(degree=1) forecaster.fit(y_train) y_pred_poly = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss poly_acc = round(smape_loss(y_pred_poly, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_poly = round(min(fut_pred), 2) max_poly = round(max(fut_pred), 2) from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing sp1 = math.floor(len(y_test) / 4) sp2 = min(sp1, 12) spval = max(2, sp2) forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)), ("holt", ExponentialSmoothing(trend="add", damped=False, seasonal="multiplicative", sp=spval)), ("damped", ExponentialSmoothing(trend="add", damped=True, seasonal="multiplicative", sp=spval)) ]) forecaster.fit(y_train) y_pred_ensem = forecaster.predict(fh) ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_ensem = round(min(fut_pred), 2) max_ensem = round(max(fut_pred), 2) from sklearn.neighbors import KNeighborsRegressor regressor = KNeighborsRegressor(n_neighbors=1) from sktime.forecasting.compose import ReducedRegressionForecaster forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=15, strategy="recursive") param_grid = {"window_length": [5, 10, 15]} from sktime.forecasting.model_selection import SlidingWindowSplitter from sktime.forecasting.model_selection import ForecastingGridSearchCV # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid) gscv.fit(y_train) y_pred_redreg = gscv.predict(fh) redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4) #full model dev and forecast next 5 days gscv.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = gscv.predict(futurewin) min_redreg = round(min(fut_pred), 2) max_redreg = round(max(fut_pred), 2) return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc