def forecasting_autoarima(y_train, y_test, s): fh = np.arange(len(y_test)) + 1 forecaster = AutoARIMA(sp=s) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot()
def test_multiplex_or_dunder(): """Test that the MultiplexForecaster magic "|" dunder methodbahves as expected. A MultiplexForecaster can be created by using the "|" dunder method on either forecaster or MultiplexForecaster objects. Here we test that it performs as expected on all the use cases, and raises the expected error in some others. """ # test a simple | example with two forecasters: multiplex_two_forecaster = AutoETS() | NaiveForecaster() assert isinstance(multiplex_two_forecaster, MultiplexForecaster) assert len(multiplex_two_forecaster.forecasters) == 2 # now test that | also works on two MultiplexForecasters: multiplex_one = MultiplexForecaster([("arima", AutoARIMA()), ("ets", AutoETS())]) multiplex_two = MultiplexForecaster([("theta", ThetaForecaster()), ("naive", NaiveForecaster())]) multiplex_two_multiplex = multiplex_one | multiplex_two assert isinstance(multiplex_two_multiplex, MultiplexForecaster) assert len(multiplex_two_multiplex.forecasters) == 4 # last we will check 3 forecaster with the same name - should check both that # MultiplexForecaster | forecaster works, and that ensure_unique_names works multiplex_same_name_three_test = (NaiveForecaster(strategy="last") | NaiveForecaster(strategy="mean") | NaiveForecaster(strategy="drift")) assert isinstance(multiplex_same_name_three_test, MultiplexForecaster) assert len(multiplex_same_name_three_test.forecasters) == 3 assert (len( set( multiplex_same_name_three_test._get_estimator_names( multiplex_same_name_three_test.forecasters))) == 3) # test we get a ValueError if we try to | with anything else: with pytest.raises(TypeError): multiplex_one | "this shouldn't work"
def construct_M4_forecasters(sp, fh): kwargs = {"model": SEASONAL_MODEL, "sp": sp} if sp > 1 else {} theta_bc = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_R, **kwargs), BoxCoxTransformer(bounds=(0, 1)), ThetaForecaster(deseasonalise=False)) """ MLP = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python, **kwargs), Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)), RecursiveRegressionForecaster( regressor=MLPRegressor(hidden_layer_sizes=6, activation="identity", solver="adam", max_iter=100, learning_rate="adaptive", learning_rate_init=0.001), window_length=3) ) RNN = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python, **kwargs), Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)), RecursiveTimeSeriesRegressionForecaster( regressor=SimpleRNNRegressor(nb_epochs=100), window_length=3) ) """ forecasters = { "Naive": NaiveForecaster(strategy="last"), "sNaive": NaiveForecaster(strategy="seasonal_last", sp=sp), "Naive2": deseasonalise(NaiveForecaster(strategy="last"), **kwargs), "SES": deseasonalise(ses, **kwargs), "Holt": deseasonalise(holt, **kwargs), "Damped": deseasonalise(damped, **kwargs), "Theta": deseasonalise(ThetaForecaster(deseasonalise=False), **kwargs), "ARIMA": AutoARIMA(suppress_warnings=True, error_action="ignore", sp=sp), "Com": deseasonalise( EnsembleForecaster([("ses", ses), ("holt", holt), ("damped", damped)]), **kwargs), # "MLP": MLP, # "RNN": RNN, "260": theta_bc, } return forecasters
def build_forecaster(self): if self.type == 'auto_arima': forecaster = AutoARIMA(sp=7, suppress_warnings=True) elif self.type == 'prophet': forecaster = Prophet() elif self.type == 'deepar': forecaster = DeepAREstimator(prediction_length=self.horizon, freq=self.freq, trainer=Trainer( ctx="cpu", epochs=15, learning_rate=1e-3, num_batches_per_epoch=100)) else: raise NotImplementedError( f'Model {type} is currently not implemented') return forecaster
def train_model_autoarima(y, x, output: bool = True) -> AutoARIMA: if output: logger.info("Training AutoARIMA model...") timer = Timer() model = AutoARIMA(suppress_warnings=True, error_action='ignore') y = pd.Series(data=np.delete(y, 0)) x = pd.DataFrame(data=x[:-1]) model.fit(y, x) if output: model.summary() logger.info(f'Done in {timer}') return model
def test_auto_arima(): """Test bug in 805. https://github.com/alan-turing-institute/sktime/issues/805#issuecomment-891848228. """ time_index = pd.date_range("January 1, 2021", periods=8, freq="1D") X = pd.DataFrame( np.random.randint(0, 4, 24).reshape(8, 3), columns=["First", "Second", "Third"], index=time_index, ) y = pd.Series([1, 3, 2, 4, 5, 2, 3, 1], index=time_index) fh_ = ForecastingHorizon(X.index[5:], is_relative=False) a_clf = AutoARIMA(start_p=2, start_q=2, max_p=5, max_q=5) clf = a_clf.fit(X=X[:5], y=y[:5]) y_pred_sk = clf.predict(fh=fh_, X=X[5:]) pd.testing.assert_index_equal( y_pred_sk.index, pd.date_range("January 6, 2021", periods=3, freq="1D")) time_index = pd.date_range("January 1, 2021", periods=8, freq="2D") X = pd.DataFrame( np.random.randint(0, 4, 24).reshape(8, 3), columns=["First", "Second", "Third"], index=time_index, ) y = pd.Series([1, 3, 2, 4, 5, 2, 3, 1], index=time_index) fh = ForecastingHorizon(X.index[5:], is_relative=False) a_clf = AutoARIMA(start_p=2, start_q=2, max_p=5, max_q=5) clf = a_clf.fit(X=X[:5], y=y[:5]) y_pred_sk = clf.predict(fh=fh, X=X[5:]) pd.testing.assert_index_equal( y_pred_sk.index, pd.date_range("January 11, 2021", periods=3, freq="2D"))
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
ets_frcstr = ExponentialSmoothing(trend='additive', seasonal='additive', sp=12) ets_frcstr.fit(y_train) y_pred = ets_frcstr.predict(fh) plot_series(y_train, y_test, y_pred, labels=['Обучающая', 'т', 'п']) ets_frcstr.get_fitted_params() ets_frcstr.get_params() smape_loss(y_test, y_pred) auto_ets_frr = AutoETS() auto_ets_frr.fit(y_pred) auto_ets_frr.summary() arima_frr = AutoARIMA() arima_frr = ARIMA() forecaster = ARIMA( order=(1, 1, 0), seasonal_order=(0, 1, 0, 12), suppress_warnings=True )
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
plt.rcParams["figure.figsize"] = [16, 7] # for fancy plots plt.style.use('ggplot') df = pd.read_csv( 'https://raw.githubusercontent.com/selva86/datasets/master/a10.csv', parse_dates=['date'], index_col="date") df.index = pd.PeriodIndex(df.index, freq="M") series = df.T.iloc[0] plot_series(series) model_auto = AutoARIMA(sp=12, suppress_warnings=True).fit(series) summary = model_auto.summary() def get_params(summary_text): full = re.findall(r'SARIMAX\(.*?\)x\(.*?\)', summary_text)[0] info = [int(_) for _ in re.findall(r'\d+', full)] return info p, d, q, P, D, Q, S = get_params(summary.as_text()) y_train, y_test = temporal_train_test_split(series, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False)
sp=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) st.write(''' 另一个常见模型是ARIMA模型。 在sktime中,我们连接pmdarima,这是一个用于自动选择最佳ARIMA模型的软件包。 这是因为搜索了许多可能的模型参数,因此可能需要更长的时间。 ''') from sktime.forecasting.arima import AutoARIMA forecaster = AutoARIMA(sp=12, suppress_warnings=True) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) st.write(''' ### 4.3 Compositite model building sktime提供了用于组合模型构建的模块化API,以进行预测。 * Ensembling 像scikit-learn一样,sktime提供了一个元预测器来集成多种预测算法。 例如,我们可以如下组合指数平滑的不同变体: ''')
from sklearn.ensemble import RandomForestRegressor from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import smape_loss regressor = RandomForestRegressor() forecaster = ReducedRegressionForecaster(regressor, window_length=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=['y_train', 'y_test', 'y_pred']) smape_loss(y_test, y_pred) """Forcasting with autoarima""" from sktime.forecasting.arima import AutoARIMA forecaster = AutoARIMA(sp=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]); smape_loss(y_test, y_pred) """Time Series Classification""" from sktime.datasets import load_arrow_head from sktime.classification.compose import TimeSeriesForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score X, y = load_arrow_head(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y)