def test_weights_for_airline_averaging(): y = load_airline() y_train, y_test = temporal_train_test_split(y) forecaster = OnlineEnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped_trend", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) forecaster.fit(y_train) expected = np.array([1 / 3, 1 / 3, 1 / 3]) np.testing.assert_allclose(forecaster.weights, expected, rtol=1e-8)
def load_dataset(): y = load_airline() df = pd.DataFrame(y) df.index = df.index.to_timestamp() rolling_mean = df.rolling(window=12).mean() rolling_std = df.rolling(window=12).std() return (y, df, rolling_mean, rolling_std)
def test_pred_errors_against_y_test(fh): """Check prediction performance on airline dataset. Y_test must lie in the prediction interval with coverage=0.1. Arguments --------- fh: ForecastingHorizon, fh at which to test prediction Raises ------ AssertionError - if point forecasts do not lie withing the prediction intervals """ y = load_airline() y_train, y_test = temporal_train_test_split(y) f = ThetaForecaster() f.fit(y_train, fh=fh) intervals = f.predict_interval(fh=fh, coverage=[0.1]) y_test = y_test.iloc[check_fh(fh) - 1] # Performance should be good enough that all point forecasts lie within the # prediction intervals. for ints in intervals: if ints[1] < 0.5: assert np.all(y_test > intervals[ints].values) else: assert np.all(y_test <= intervals[ints].values)
def test_boxcox_transform(): y = load_airline() t = TabularToSeriesAdaptor(PowerTransformer(method="box-cox", standardize=False)) actual = t.fit_transform(y) expected, _ = boxcox(np.asarray(y)) # returns fitted lambda as second output np.testing.assert_array_equal(actual, expected)
def test_theta_1(): # with theta = 1 Theta-line is equal to the original time-series y = load_airline() t = ThetaLinesTransformer(1) t.fit(y) actual = t.transform(y) np.testing.assert_array_equal(actual, y)
def test_gscv_fit(forecaster, param_dict, cv, scoring): param_grid = ParameterGrid(param_dict) y = load_airline() gscv = ForecastingGridSearchCV( forecaster, param_grid=param_dict, cv=cv, scoring=scoring ) gscv.fit(y) # check scores gscv_scores = gscv.cv_results_[f"mean_test_{scoring.name}"] expected_scores = compute_expected_gscv_scores( forecaster, cv, param_grid, y, scoring ) np.testing.assert_array_equal(gscv_scores, expected_scores) # check best parameters assert gscv.best_params_ == param_grid[gscv_scores.argmin()] # check best forecaster is the one with best parameters assert { key: value for key, value in gscv.best_forecaster_.get_params().items() if key in gscv.best_params_.keys() } == gscv.best_params_
def test_pipeline(): """Test results of TransformedTargetForecaster.""" y = load_airline() y_train, y_test = temporal_train_test_split(y) forecaster = TransformedTargetForecaster([ ("t1", ExponentTransformer()), ("t2", TabularToSeriesAdaptor(MinMaxScaler())), ("forecaster", NaiveForecaster()), ]) fh = np.arange(len(y_test)) + 1 forecaster.fit(y_train, fh=fh) actual = forecaster.predict() def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = ExponentTransformer() yt = t1.fit_transform(yt) t2 = TabularToSeriesAdaptor(MinMaxScaler()) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred expected = compute_expected_y_pred(y_train, fh) np.testing.assert_array_equal(actual, expected)
def test_pipeline(): y = load_airline() y_train, y_test = temporal_train_test_split(y) forecaster = TransformedTargetForecaster([ ("t1", Deseasonalizer(sp=12, model="multiplicative")), ("t2", Detrender(PolynomialTrendForecaster(degree=1))), ("forecaster", NaiveForecaster()), ]) fh = np.arange(len(y_test)) + 1 forecaster.fit(y_train, fh=fh) actual = forecaster.predict() def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = Deseasonalizer(sp=12, model="multiplicative") yt = t1.fit_transform(yt) t2 = Detrender(PolynomialTrendForecaster(degree=1)) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred expected = compute_expected_y_pred(y_train, fh) np.testing.assert_array_equal(actual, expected)
def check_trend(degree, with_intercept): """Helper function to check trend""" y = load_airline() f = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept) f.fit(y) a = f.regressor_.steps[-1][1].coef_[ ::-1] # intercept is added in reverse order b = compute_expected_coefs(y, degree, with_intercept) np.testing.assert_allclose(a, b)
def test_linear_detrending(): y = load_airline() f = PolynomialTrendForecaster(degree=1, with_intercept=True) t = Detrender(f) a = t.fit_transform(y) b = compute_expected_detrend(y, 1, with_intercept=True) np.testing.assert_allclose(a, b)
def test_boxcox_against_scipy(): y = load_airline() t = BoxCoxTransformer() actual = t.fit_transform(y) excepted, expected_lambda = boxcox(y.values) np.testing.assert_array_equal(actual, excepted) assert t.lambda_ == expected_lambda
def test_forecaster_with_initial_level(): y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(initial_level=0.1, sp=12) f.fit(y_train) y_pred = f.predict(fh=fh) np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
def test_predictive_performance_on_airline(): y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(sp=12) f.fit(y_train) y_pred = f.predict(fh=fh) # Performance on this particular dataset should be reasonably good. np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
def test_theta_0(): # with theta = 0 y = load_airline() t = ThetaLinesTransformer(0) t.fit(y) actual = t.transform(y) x = np.arange(y.size) + 1 lin_regress = linregress(x, y) expected = lin_regress.intercept + lin_regress.slope * x np.testing.assert_almost_equal(actual, expected, decimal=8)
def test_evaluate(): y = load_airline() forecaster = NaiveForecaster(strategy="drift", sp=12) cv = ExpandingWindowSplitter( initial_window=24, step_length=24, fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], window_length=10, ) df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy="update") # just making sure the function is running assert isinstance(df, pd.DataFrame)
def test_factory_method_direct(): y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) regressor = LinearRegression() f1 = ReducedForecaster(regressor, scitype="regressor", strategy="direct") f2 = DirectRegressionForecaster(regressor) actual = f1.fit(y_train, fh=fh).predict(fh) expected = f2.fit(y_train, fh=fh).predict(fh) np.testing.assert_array_equal(actual, expected)
def test_pred_errors_against_y_test(fh): y = load_airline() y_train, y_test = temporal_train_test_split(y) f = ThetaForecaster() f.fit(y_train, fh) y_pred = f.predict(return_pred_int=False) errors = f._compute_pred_errors(alpha=0.1) if isinstance(errors, pd.Series): errors = [errors] # make iterable y_test = y_test.iloc[check_fh(fh) - 1] for error in errors: assert np.all(y_test > y_pred - error) assert np.all(y_test < y_pred + error)
def test_reductions_airline_data(forecaster, expected): """ test reduction forecasters by making prediction on airline dataset using linear estimators. predictions compared with values calculated by Lovkush Agarwal on their local machine in Mar 2021 """ y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) actual = forecaster.fit(y_train, fh=fh).predict(fh) np.testing.assert_almost_equal(actual, expected)
def test_factory_method_ts_direct(): y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) ts_regressor = Pipeline([("tabularize", Tabularizer()), ("model", LinearRegression())]) f1 = ReducedForecaster(ts_regressor, scitype="ts_regressor", strategy="direct") f2 = DirectTimeSeriesRegressionForecaster(ts_regressor) actual = f1.fit(y_train, fh=fh).predict(fh) expected = f2.fit(y_train, fh=fh).predict(fh) np.testing.assert_array_equal(actual, expected)
def test_guerrero_against_r_implementation(bounds, r_lambda): """ Testing lambda values estimated by the R implementation of the Guerrero method https://github.com/robjhyndman/forecast/blob/master/R/guerrero.R against the guerrero method in BoxCoxTransformer. R code to generate the hardcoded value for bounds=(-1, 2) used in the test ('Airline.csv' contains the data from 'load_airline()'): airline_file <- read.csv(file = 'Airline.csv')[,c('Passengers')] airline.ts <- ts(airline_file) guerrero(airline.ts, lower=-1, upper=2, nonseasonal.length = 20) Output: -0.156981228426408 """ y = load_airline() t = BoxCoxTransformer(bounds=bounds, method="guerrero", sp=20) t.fit(y) np.testing.assert_almost_equal(t.lambda_, r_lambda, decimal=4)
def test_skip_inverse_transform(): """Test transformers with skip-inverse-transform tag in pipeline.""" y = load_airline() # add nan and outlier y.iloc[3] = np.nan y.iloc[4] = y.iloc[4] * 20 y_train, y_test = temporal_train_test_split(y) forecaster = TransformedTargetForecaster([ ("t1", HampelFilter(window_length=12)), ("t2", Imputer(method="mean")), ("forecaster", NaiveForecaster()), ]) fh = np.arange(len(y_test)) + 1 forecaster.fit(y_train, fh=fh) y_pred = forecaster.predict() assert isinstance(y_pred, pd.Series)
def test_results_consistency(levels=levels): """Check consistency between wrapper and statsmodels original implementation.""" y = load_airline() fh_length = [3, 5, 10] for n in fh_length: fh = np.arange(n) + 1 for level in levels: # Fit and predict with forecaster. forecaster = UnobservedComponents(level=level) forecaster.fit(y) y_pred_forecaster = forecaster.predict(fh=fh) # Fit train statsmodels original model. model = _UnobservedComponents(level=level, endog=y) result = model.fit(disp=0) y_pred_base = result.forecast(steps=n) assert_series_equal(left=y_pred_forecaster, right=y_pred_base) assert len(fh) == y_pred_forecaster.shape[0]
def test_multioutput_direct_tabular(): # multioutput and direct strategies with linear regression # regressor should produce same predictions y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) regressor = LinearRegression() f1 = MultioutputRegressionForecaster(regressor) f2 = DirectRegressionForecaster(regressor) preds1 = f1.fit(y_train, fh=fh).predict(fh) preds2 = f2.fit(y_train, fh=fh).predict(fh) # assert_almost_equal does not seem to work with pd.Series objects np.testing.assert_almost_equal(preds1.to_numpy(), preds2.to_numpy(), decimal=5)
def test_pred_errors_against_y_test(fh): y = load_airline() y_train, y_test = temporal_train_test_split(y) f = ThetaForecaster() f.fit(y_train, fh) y_pred = f.predict(return_pred_int=False) intervals = f.compute_pred_int(y_pred, [0.1]) y_test = y_test.iloc[check_fh(fh) - 1] # Performance should be good enough that all point forecasts lie within the # prediction intervals. for ints in intervals: assert np.all(y_test > ints["lower"]) assert np.all(y_test < ints["upper"])
def test_forecaster_with_initial_level(): """Check prediction performance on airline dataset. Performance on this dataset should be reasonably good. Raises ------ AssertionError - if point forecasts do not lie close to the test data """ y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(initial_level=0.1, sp=12) f.fit(y_train) y_pred = f.predict(fh=fh) np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
def test_regressor_forecasting( regressor=MLPRegressor(nb_epochs=SMALL_NB_EPOCHS), window_length=4 ): """ test a regressor used for forecasting """ print("Start test_regressor_forecasting()") if isinstance(regressor, MCDCNNRegressor): regressor.nb_epochs = regressor.nb_epochs * 2 # load univariate time series data y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=5) y_train = y_train[:window_length * 2] # specify forecasting horizon fh = np.arange(len(y_test)) + 1 # solve forecasting task via reduction to time series regression forecaster = RecursiveTimeSeriesRegressionForecaster( estimator=regressor, window_length=window_length ) forecaster.fit(y_train) y_pred = forecaster.predict(fh) try: mse = np.sqrt(mean_squared_error(y_test, y_pred)) print("Error:", mse) except ValueError: if isinstance(regressor, MCDCNNRegressor): print( "Warning: MCDCNNRegressor produced NaN predictions. This is a " "known problem brought about by insufficient data/learning. " "For now, we accept that this particular network produced " "predictions at all (even NaNs) as passing for this " "particular test. Providing more data/epochs risks slowing " "down tests too much.") else: # unexpected error in all other cases raise print("End test_regressor_forecasting()")
def test_dirrec_against_recursive_accumulated_error(): # recursive and dirrec regressor strategies # dirrec regressor should produce lower error due to less cumulative error y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) estimator = LinearRegression() recursive = make_reduction(estimator, scitype="tabular-regressor", strategy="recursive") dirrec = make_reduction(estimator, scitype="tabular-regressor", strategy="dirrec") preds_recursive = recursive.fit(y_train, fh=fh).predict(fh) preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh) assert smape_loss(y_test, preds_dirrec) < smape_loss( y_test, preds_recursive)
def test_weights_for_airline_nnls(): y = load_airline() y_train, y_test = temporal_train_test_split(y) hedge_expert = NNLSEnsemble(n_estimators=3, loss_func=mean_squared_error) forecaster = OnlineEnsembleForecaster( [ ("av5", NaiveForecaster(strategy="mean", window_length=5)), ("av10", NaiveForecaster(strategy="mean", window_length=10)), ("av20", NaiveForecaster(strategy="mean", window_length=20)), ], ensemble_algorithm=hedge_expert, ) forecaster.fit(y_train) forecaster.update_predict(y_test) expected = np.array([0.04720766, 0, 1.03410876]) np.testing.assert_allclose(forecaster.weights, expected, atol=1e-8)
def test_weights_for_airline_normal_hedge(): """Test weights.""" y = load_airline() y_train, y_test = temporal_train_test_split(y) hedge_expert = NormalHedgeEnsemble(n_estimators=3, loss_func=mean_squared_error) forecaster = OnlineEnsembleForecaster( [ ("av5", NaiveForecaster(strategy="mean", window_length=5)), ("av10", NaiveForecaster(strategy="mean", window_length=10)), ("av20", NaiveForecaster(strategy="mean", window_length=20)), ], ensemble_algorithm=hedge_expert, ) forecaster.fit(y_train) forecaster.update_predict(y=y_test, cv=cv, reset_forecaster=False) expected = np.array([0.17077154, 0.48156709, 0.34766137]) np.testing.assert_allclose(forecaster.weights, expected, atol=1e-8)
def test_rscv_fit(forecaster, param_dict, cv, scoring, n_iter, random_state): """Tests that ForecastingRandomizedSearchCV successfully searches the parameter distributions to identify the best parameter set """ # samples uniformly from param dict values param_distributions = ParameterSampler( param_dict, n_iter, random_state=random_state ) y = load_airline() rscv = ForecastingRandomizedSearchCV( forecaster, param_distributions=param_dict, cv=cv, scoring=scoring, n_iter=n_iter, random_state=random_state, ) rscv.fit(y) # check scores rscv_scores = rscv.cv_results_[f"mean_test_{scoring.name}"] # convert ParameterSampler to list to ensure consistent # of scores expected_scores = compute_expected_gscv_scores( forecaster, cv, list(param_distributions), y, scoring ) np.testing.assert_array_equal(rscv_scores, expected_scores) # check best parameters assert rscv.best_params_ == list(param_distributions)[rscv_scores.argmin()] # check best forecaster is the one with best parameters assert { key: value for key, value in rscv.best_forecaster_.get_params().items() if key in rscv.best_params_.keys() } == rscv.best_params_