def test_fit_predict(daily_data): """Tests fit and predict.""" model = AutoArimaEstimator() train_df = daily_data["train_df"] test_df = daily_data["test_df"] assert model.last_predicted_X_ is None assert model.cached_predictions_ is None model.fit(train_df, time_col=TIME_COL, value_col=VALUE_COL) assert model.last_predicted_X_ is None assert model.cached_predictions_ is None with LogCapture(LOGGER_NAME) as log_capture: predicted = model.predict(test_df) assert list(predicted.columns) == [ TIME_COL, PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ] assert_equal(model.last_predicted_X_, test_df) assert_equal(model.cached_predictions_, predicted) log_capture.check() # no log messages (not using cached predictions) y_true = test_df[VALUE_COL] y_pred = predicted[PREDICTED_COL] err = calc_pred_err(y_true, y_pred) enum = EvaluationMetricEnum.Correlation assert err[enum.get_metric_name()] > 0.50 enum = EvaluationMetricEnum.MeanAbsoluteError assert err[enum.get_metric_name()] < 2.5 enum = EvaluationMetricEnum.RootMeanSquaredError assert err[enum.get_metric_name()] < 3.0 enum = EvaluationMetricEnum.MedianAbsoluteError assert err[enum.get_metric_name()] < 3.0 # Uses cached predictions with LogCapture(LOGGER_NAME) as log_capture: assert_equal(model.predict(test_df), predicted) log_capture.check((LOGGER_NAME, LoggingLevelEnum.DEBUG.name, "Returning cached predictions.")) # Predicts on a different dataset with LogCapture(LOGGER_NAME) as log_capture: predicted = model.predict(train_df) assert_equal(model.last_predicted_X_, train_df) assert_equal(model.cached_predictions_, predicted) log_capture.check() # no log messages (not using cached predictions) # .fit() clears the cached result model.fit(train_df, time_col=TIME_COL, value_col=VALUE_COL) assert model.last_predicted_X_ is None assert model.cached_predictions_ is None
def test_predict_interaction(daily_data): """Tests interaction between predict date and parameter `d`. Arima can not predict below `d`.""" model = AutoArimaEstimator(d=10) df = daily_data["df"] train_df = daily_data["train_df"] test_df = daily_data["test_df"] model.fit(train_df, time_col=TIME_COL, value_col=VALUE_COL) # predict start date < d # Predicted, lower and upper CI values of the first 4 (10-6) days should be NaN predicted = model.predict(df[6:]) print(predicted.head(10)) assert (predicted[[ PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ]][0:4]).isnull().values.all() assert not (predicted[[ PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ]][5:10]).isnull().values.any() # predict start date > d # Predicted, lower and upper CI values should not be NaN predicted = model.predict(df[12:]) assert not (predicted[[ PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ]][0:4]).isnull().values.any() # predict start date > train end date # Predicted, lower and upper CI values should not be NaN predicted = model.predict(test_df[5:]) assert not (predicted[[ PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ]][0:4]).isnull().values.any()
def test_score_function(daily_data): """Checks score function accuracy""" # with null model model = AutoArimaEstimator(null_model_params={"strategy": "mean"}) train_df = daily_data["train_df"] value_col = "y" time_col = "ts" model.fit(train_df, time_col=time_col, value_col=value_col) score = model.score(daily_data["test_df"], daily_data["test_df"][value_col]) assert score < 0.40 # without null model model = AutoArimaEstimator() train_df = daily_data["train_df"] value_col = "y" time_col = "ts" model.fit(train_df, time_col=time_col, value_col=value_col) score = model.score(daily_data["test_df"], daily_data["test_df"][value_col]) assert score < 8.0
def test_summary(daily_data): """Checks summary function output without error""" model = AutoArimaEstimator() train_df = daily_data["train_df"] value_col = "y" time_col = "ts" model.fit(train_df, time_col=time_col, value_col=value_col) model.summary()
def test_property(): """Tests properties""" assert AutoArimaTemplate().allow_model_template_list is False assert AutoArimaTemplate().allow_model_components_param_list is False assert AutoArimaTemplate().get_regressor_cols() is None template = AutoArimaTemplate() assert template.DEFAULT_MODEL_TEMPLATE == "AUTO_ARIMA" assert isinstance(template.estimator, AutoArimaEstimator) assert template.estimator.coverage == 0.90 assert template.apply_forecast_config_defaults( ).model_template == "AUTO_ARIMA" estimator = AutoArimaEstimator(coverage=0.99) template = AutoArimaTemplate(estimator=estimator) assert template.estimator is estimator
def test_null_model(X): """Checks null model""" model = AutoArimaEstimator(null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }) model.fit(X) y = np.repeat(2.0, X.shape[0]) null_score = model.null_model.score(X, y=y) assert null_score == mean_squared_error(y, np.repeat(9.0, X.shape[0])) # tests if different score function gets propagated to null model model = AutoArimaEstimator(score_func=mean_absolute_error, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }) model.fit(X) y = np.repeat(2.0, X.shape[0]) null_score = model.null_model.score(X, y=y) assert null_score == mean_absolute_error(y, np.repeat(9.0, X.shape[0]))
def test_forecast_via_arima_freq(params): frequencies = ["H", "D", "M"] for freq in frequencies: df = generate_df_for_tests(freq=freq, periods=50) train_df = df["train_df"] test_df = df["test_df"] # tests model fit and predict work without error model = AutoArimaEstimator(**params) try: model.fit(train_df, time_col=TIME_COL, value_col=VALUE_COL) pred = model.predict(test_df) except Exception: print(f"Failed for frequency {freq}") raise assert list(pred.columns) == [ TIME_COL, PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ] assert pred[TIME_COL].equals(test_df[TIME_COL]) model.summary()
def test_arima_setup(params, X): """Checks if parameters are passed to Auto-Arima correctly""" coverage = 0.99 model = AutoArimaEstimator(score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params) # set_params must be able to replicate the init model2 = AutoArimaEstimator() model2.set_params(**dict(score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params)) assert model2.__dict__ == model.__dict__ model.fit(X) direct_model = AutoARIMA(**params) model_params = model.model.__dict__ direct_model_params = direct_model.__dict__ assert model_params["start_p"] == direct_model_params["start_p"] assert model_params["d"] == direct_model_params["d"] assert model_params["start_q"] == direct_model_params["start_q"] assert model_params["max_p"] == direct_model_params["max_p"] assert model_params["max_d"] == direct_model_params["max_d"] assert model_params["max_q"] == direct_model_params["max_q"] assert model_params["start_P"] == direct_model_params["start_P"] assert model_params["D"] == direct_model_params["D"] assert model_params["start_Q"] == direct_model_params["start_Q"] assert model_params["max_P"] == direct_model_params["max_P"] assert model_params["max_D"] == direct_model_params["max_D"] assert model_params["max_Q"] == direct_model_params["max_Q"] assert model_params["max_order"] == direct_model_params["max_order"] assert model_params["m"] == direct_model_params["m"] assert model_params["seasonal"] == direct_model_params["seasonal"] assert model_params["stationary"] == direct_model_params["stationary"] assert model_params["information_criterion"] == direct_model_params[ "information_criterion"] assert model_params["alpha"] == direct_model_params["alpha"] assert model_params["test"] == direct_model_params["test"] assert model_params["seasonal_test"] == direct_model_params[ "seasonal_test"] assert model_params["stepwise"] == direct_model_params["stepwise"] assert model_params["n_jobs"] == direct_model_params["n_jobs"] assert model_params["start_params"] == direct_model_params["start_params"] assert model_params["trend"] == direct_model_params["trend"] assert model_params["method"] == direct_model_params["method"] assert model_params["maxiter"] == direct_model_params["maxiter"] assert model_params["offset_test_args"] == direct_model_params[ "offset_test_args"] assert model_params["seasonal_test_args"] == direct_model_params[ "seasonal_test_args"] assert model_params["suppress_warnings"] == direct_model_params[ "suppress_warnings"] assert model_params["error_action"] == direct_model_params["error_action"] assert model_params["trace"] == direct_model_params["trace"] assert model_params["random"] == direct_model_params["random"] assert model_params["random_state"] == direct_model_params["random_state"] assert model_params["n_fits"] == direct_model_params["n_fits"] assert model_params["out_of_sample_size"] == direct_model_params[ "out_of_sample_size"] assert model_params["scoring"] == direct_model_params["scoring"] assert model_params["scoring_args"] == direct_model_params["scoring_args"] assert model_params["with_intercept"] == direct_model_params[ "with_intercept"] assert model_params["kwargs"] == direct_model_params["kwargs"]
def __init__(self, estimator: BaseForecastEstimator = AutoArimaEstimator()): super().__init__(estimator=estimator)