def test_apply_template_for_pipeline_params(df): mt = MyTemplate() config = ForecastConfig(metadata_param=MetadataParam( time_col=NEW_TIME_COL, value_col=NEW_VALUE_COL, ), evaluation_metric_param=EvaluationMetricParam( cv_selection_metric="MeanSquaredError")) original_config = dataclasses.replace(config) # Tests apply_template_for_pipeline_params pipeline_params = mt.apply_template_for_pipeline_params(df=df, config=config) assert_equal(pipeline_params["df"], df) assert pipeline_params["train_end_date"] is None estimator = pipeline_params["pipeline"].steps[-1][-1] assert isinstance(estimator, SilverkiteEstimator) assert estimator.coverage == mt.config.coverage assert mt.estimator is not estimator assert mt.estimator.coverage is None assert ( pipeline_params["pipeline"].named_steps["input"].transformer_list[2] [1].named_steps["select_reg"].column_names == mt.get_regressor_cols()) # Tests `apply_template_decorator` assert mt.config == mt.apply_forecast_config_defaults(config) assert mt.config != config # `mt.config` has default values added assert config == original_config # `config` is not modified by the function
def test_partial_test_data(): """Tests if forecast evaluation can handle partially missing data""" df = pd.DataFrame({ cst.TIME_COL: [ "2018-01-01", datetime.datetime(2018, 1, 2), "2018-01-03", "2018-01-04", "2018-01-05" ], cst.ACTUAL_COL: [1, 2, 3, 2, np.nan], cst.PREDICTED_COL: [1, 4, 1, 2, 4], cst.PREDICTED_LOWER_COL: [1, 1, 1, 1, 2], cst.PREDICTED_UPPER_COL: [4, 5, 4, 4, 6], cst.NULL_PREDICTED_COL: [1.5, 1.5, 1.5, 1.5, 1.5] }) with pytest.warns(UserWarning) as record: forecast = UnivariateForecast(df, train_end_date=datetime.datetime( 2018, 1, 2)) forecast2 = UnivariateForecast(df.iloc[:4, ], train_end_date=datetime.datetime( 2018, 1, 2)) assert forecast.test_na_count == 1 assert "1 value(s) in y_true were NA or infinite and are omitted in error calc." in record[ 0].message.args[0:2] assert_equal(forecast.train_evaluation, forecast2.train_evaluation) assert_equal(forecast.test_evaluation, forecast2.test_evaluation)
def test_null_model(X): """Checks null model""" model = BaseSilverkiteEstimator(null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }) model.fit(X) y = np.repeat(2.0, X.shape[0]) null_score = model.null_model.score(X, y=y) assert null_score == mean_squared_error(y, np.repeat(9.0, X.shape[0])) # tests if different score function gets propagated to null model model = BaseSilverkiteEstimator(score_func=mean_absolute_error, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }) model.fit(X) y = np.repeat(2.0, X.shape[0]) null_score = model.null_model.score(X, y=y) assert null_score == mean_absolute_error(y, np.repeat(9.0, X.shape[0])) # checks that `df` is set assert_equal(X, model.df)
def test_load_peyton_manning_ts(): dl = DataLoaderTS() ts = dl.load_peyton_manning_ts() assert ts.original_time_col == TIME_COL assert ts.original_value_col == VALUE_COL assert ts.freq == "1D" assert_equal(ts.df[VALUE_COL], ts.y)
def test_prophet_hyperparameter_grid_default(): """Tests get_hyperparameter_grid and apply_prophet_model_components_defaults""" template = ProphetTemplate() template.config = template.apply_forecast_config_defaults() # both model_components, time_properties are None hyperparameter_grid = template.get_hyperparameter_grid() expected_holidays = template.get_prophet_holidays(year_list=list( range(2015 - 1, 2030 + 2)), countries="auto", lower_window=-2, upper_window=2) expected_grid = { "estimator__growth": ["linear"], "estimator__seasonality_mode": ["additive"], "estimator__seasonality_prior_scale": [10.0], "estimator__yearly_seasonality": ["auto"], "estimator__weekly_seasonality": ["auto"], "estimator__daily_seasonality": ["auto"], "estimator__add_seasonality_dict": [None], "estimator__holidays": [expected_holidays], "estimator__holidays_prior_scale": [10.0], "estimator__changepoint_prior_scale": [0.05], "estimator__changepoints": [None], "estimator__n_changepoints": [25], "estimator__changepoint_range": [0.8], "estimator__mcmc_samples": [0], "estimator__uncertainty_samples": [1000], "estimator__add_regressor_dict": [None] } assert_equal(actual=hyperparameter_grid, expected=expected_grid)
def test_gcd_irregular(): """Checks sort and fill missing dates""" # gaps in unsorted, irregular input df = pd.DataFrame({ TIME_COL: [ datetime.datetime(2018, 1, 1, 0, 0, 1), datetime.datetime(2018, 1, 1, 0, 0, 2), datetime.datetime(2018, 1, 1, 0, 0, 10), # intentionally out of order datetime.datetime(2018, 1, 1, 0, 0, 4) ], VALUE_COL: [1, 2, 3, 4] }) expected = pd.DataFrame({ # in sorted order TIME_COL: pd.date_range(start=datetime.datetime(2018, 1, 1, 0, 0, 1), end=datetime.datetime(2018, 1, 1, 0, 0, 10), freq="S"), VALUE_COL: [1, 2, np.nan, 4, np.nan, np.nan, np.nan, np.nan, np.nan, 3] }) expected.index = expected[TIME_COL] expected.index.name = None canonical_data_dict = get_canonical_data( df=df, time_col=TIME_COL, value_col=VALUE_COL, freq="S") # the frequency should be provided when there are gaps assert canonical_data_dict["time_stats"]["added_timepoints"] == 6 assert canonical_data_dict["time_stats"]["dropped_timepoints"] == 0 assert_equal(canonical_data_dict["df"], expected) assert_equal(canonical_data_dict["time_stats"]["gaps"], find_missing_dates(df[TIME_COL]))
def test_plot_runtimes(valid_bm): bm = valid_bm # default value, all configs fig = bm.plot_runtimes() assert fig.layout.showlegend assert fig.layout.xaxis.title.text is None assert fig.layout.yaxis.title.text == "Mean runtime in seconds" assert fig.layout.title.text == "Average runtime across rolling windows" expected_xaxis = set(bm.configs) assert fig.data[0].name == "Runtime" assert_equal(set(fig.data[0].x), expected_xaxis) # custom value config_names = ["valid_prophet"] fig = bm.plot_runtimes(config_names=config_names, xlabel="xlab", ylabel="ylab", title="title", showlegend=False) assert not fig.layout.showlegend assert fig.layout.xaxis.title.text == "xlab" assert fig.layout.yaxis.title.text == "ylab" assert fig.layout.title.text == "title" expected_xaxis = set(config_names) assert fig.data[0].name == "Runtime" assert_equal(set(fig.data[0].x), expected_xaxis)
def test_prophet_hyperparameter_grid_seasonality_growth(default_holidays): """Tests get_hyperparameter_grid for basic seasonality, growth and other default params""" seasonality = {"yearly_seasonality": [True], "weekly_seasonality": [False]} growth = {"growth_term": ["linear"]} model_components = ModelComponentsParam(seasonality=seasonality, growth=growth) template = ProphetTemplate() template.config = template.apply_forecast_config_defaults() template.config.model_components_param = model_components hyperparameter_grid = template.get_hyperparameter_grid() # Expected Values expected_holidays = default_holidays expected_grid = { "estimator__growth": ["linear"], "estimator__seasonality_mode": ["additive"], "estimator__seasonality_prior_scale": [10.0], "estimator__yearly_seasonality": [True], "estimator__weekly_seasonality": [False], "estimator__daily_seasonality": ["auto"], "estimator__add_seasonality_dict": [None], "estimator__holidays": [expected_holidays], "estimator__holidays_prior_scale": [10.0], "estimator__changepoint_prior_scale": [0.05], "estimator__changepoints": [None], "estimator__n_changepoints": [25], "estimator__changepoint_range": [0.8], "estimator__mcmc_samples": [0], "estimator__uncertainty_samples": [1000], "estimator__add_regressor_dict": [None] } # Assertions assert_equal(actual=hyperparameter_grid, expected=expected_grid)
def test_zscore_outlier_transformer1(data): """Checks if outliers are properly replaced""" # z_cutoff=None (default) zscore_transform = ZscoreOutlierTransformer(use_fit_baseline=True) # init does not modify parameters assert zscore_transform.z_cutoff is None assert zscore_transform.use_fit_baseline is True assert zscore_transform.mean is None assert zscore_transform.std is None assert zscore_transform._is_fitted is None # doesn't need to be fit result = zscore_transform.transform(data) assert_equal(result, data) result = zscore_transform.fit_transform(data) assert_equal(result, data) assert zscore_transform.mean is None assert zscore_transform.std is None # z_cutoff=3.0, doesn't need to be fit with LogCapture(LOGGER_NAME) as log_capture: zscore_transform = ZscoreOutlierTransformer(z_cutoff=3.0) assert zscore_transform.z_cutoff == 3.0 result = zscore_transform.transform(data) assert zscore_transform.mean is None assert zscore_transform.std is None expected = data.copy() expected.loc[6, "d"] = np.nan assert_equal(result, expected) log_capture.check((LOGGER_NAME, "INFO", "Detected 1 outlier(s).")) zscore_transform.fit_transform(data) assert zscore_transform.mean is None assert zscore_transform.std is None # z_cutoff=2.0, requires fit zscore_transform = ZscoreOutlierTransformer(z_cutoff=2.0, use_fit_baseline=True) with pytest.raises(NotFittedError, match="This instance is not fitted yet"): zscore_transform.transform(data) with LogCapture(LOGGER_NAME) as log_capture: result = zscore_transform.fit_transform(data) expected = data.copy() expected.loc[4, "c"] = np.nan expected.loc[6, "d"] = np.nan assert_equal(result, expected) log_capture.check((LOGGER_NAME, "INFO", "Detected 2 outlier(s).")) # uses fitted mean and std to calculate z-scores test_data = data + 1e5 # all values are outliers result = zscore_transform.transform(test_data) assert result.isna().all().all() # use_fit_baseline=False zscore_transform = ZscoreOutlierTransformer(z_cutoff=2.0, use_fit_baseline=False) result = zscore_transform.transform(test_data) expected = test_data.copy() expected.loc[4, "c"] = np.nan expected.loc[6, "d"] = np.nan assert_equal(result, expected)
def test_load_hourly_bikesharing_ts(): dl = DataLoaderTS() ts = dl.load_bikesharing_ts() assert ts.original_time_col == "ts" assert ts.original_value_col == "count" assert ts.freq == "H" assert ts.regressor_cols == ["tmin", "tmax", "pn"] assert_equal(ts.df[VALUE_COL], ts.y)
def test_no_train_end_date(df): """Tests if train end date can be None""" forecast = UnivariateForecast(df, train_end_date=None) forecast2 = UnivariateForecast(df, train_end_date=datetime.datetime( 2018, 1, 4)) assert_equal(forecast.train_evaluation, forecast2.train_evaluation) assert forecast.test_evaluation is None
def test_reorder_columns(): """Tests reorder_columns""" df = pd.DataFrame(np.random.randn(3, 4), columns=list("abcd")) reordered_df = reorder_columns(df, order_dict=None) assert_equal(df, reordered_df) order_dict = {"a": 3, "b": -1, "c": 5, "d": 2} reordered_df = reorder_columns(df, order_dict=order_dict) assert_equal(df[["b", "d", "a", "c"]], reordered_df)
def test_auto_arima_hyperparameter_grid_default(): """Tests get_hyperparameter_grid and apply_prophet_model_components_defaults""" template = AutoArimaTemplate() template.config = template.apply_forecast_config_defaults() # model_components is None hyperparameter_grid = template.get_hyperparameter_grid() expected_grid = { # Additional parameters "estimator__freq": [None], # pmdarima fit parameters "estimator__start_p": [2], "estimator__d": [None], "estimator__start_q": [2], "estimator__max_p": [5], "estimator__max_d": [2], "estimator__max_q": [5], "estimator__start_P": [1], "estimator__D": [None], "estimator__start_Q": [1], "estimator__max_P": [2], "estimator__max_D": [1], "estimator__max_Q": [2], "estimator__max_order": [5], "estimator__m": [1], "estimator__seasonal": [True], "estimator__stationary": [False], "estimator__information_criterion": ["aic"], "estimator__alpha": [0.05], "estimator__test": ["kpss"], "estimator__seasonal_test": ["ocsb"], "estimator__stepwise": [True], "estimator__n_jobs": [1], "estimator__start_params": [None], "estimator__trend": [None], "estimator__method": ["lbfgs"], "estimator__maxiter": [ 20 ], # Reduced from 50 (default value in pmdarima) for improved speed and robustness, "estimator__offset_test_args": [None], "estimator__seasonal_test_args": [None], "estimator__suppress_warnings": [True], "estimator__error_action": ["trace"], "estimator__trace": [False], "estimator__random": [False], "estimator__random_state": [None], "estimator__n_fits": [10], "estimator__out_of_sample_size": [0], "estimator__scoring": ["mse"], "estimator__scoring_args": [None], "estimator__with_intercept": ["auto"], # pmdarima predict parameters "estimator__return_conf_int": [True], "estimator__dynamic": [False] } assert_equal(actual=hyperparameter_grid, expected=expected_grid)
def test_load_hourly_beijing_pm_ts(): dl = DataLoaderTS() ts = dl.load_beijing_pm_ts() assert ts.original_time_col == TIME_COL assert ts.original_value_col == "pm" assert ts.freq == "H" assert ts.regressor_cols == [ "dewp", "temp", "pres", "cbwd", "iws", "is", "ir" ] assert_equal(ts.df[VALUE_COL], ts.y)
def test_prophet_template_default(): """Tests prophet_template with default values, for limited data""" # prepares input data num_days = 10 data = generate_df_for_tests(freq="D", periods=num_days, train_start_date="2018-01-01") df = data["df"] template = ProphetTemplate() config = ForecastConfig(model_template="PROPHET") params = template.apply_template_for_pipeline_params(df=df, config=config) # not modified assert config == ForecastConfig(model_template="PROPHET") # checks result metric = EvaluationMetricEnum.MeanAbsolutePercentError pipeline = params.pop("pipeline", None) expected_params = dict( df=df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL, date_format=None, freq=None, train_end_date=None, anomaly_info=None, # model regressor_cols=None, lagged_regressor_cols=None, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=None, n_jobs=COMPUTATION_N_JOBS, verbose=1, # forecast forecast_horizon=None, coverage=None, test_horizon=None, periods_between_train_test=None, agg_periods=None, agg_func=None, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, null_model_params=None, relative_error_tolerance=None, # CV cv_horizon=None, cv_min_train_periods=None, cv_expanding_window=True, cv_periods_between_splits=None, cv_periods_between_train_test=None, cv_max_splits=3) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_setup(params): """Tests __init__ and attributes set during fit""" coverage = 0.90 silverkite = SimpleSilverkiteForecast() model = SimpleSilverkiteEstimator(silverkite=silverkite, score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params) assert model.silverkite == silverkite assert model.score_func == mean_squared_error assert model.coverage == coverage assert model.null_model_params is None # set_params must be able to replicate the init model2 = SimpleSilverkiteEstimator() model2.set_params(**dict(silverkite=silverkite, score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params)) assert model2.__dict__ == model.__dict__ initialized_params = model.__dict__ initialized_params_subset = { k: v for k, v in initialized_params.items() if k in params.keys() } assert_equal(initialized_params_subset, params) assert model.model_dict is None assert model.pred_cols is None assert model.feature_cols is None assert model.coef_ is None train_df = daily_data_reg().get("train_df").copy() model.fit(train_df) assert model.fit_algorithm_dict == { "fit_algorithm": "sgd", "fit_algorithm_params": { "alpha": 0.1 } } assert model.model_dict is not None assert type(model.model_dict["ml_model"]) == SGDRegressor assert model.model_dict["ml_model"].alpha == ( params["fit_algorithm_dict"]["fit_algorithm_params"]["alpha"]) assert model.model_dict["training_evaluation"] is not None assert model.model_dict["test_evaluation"] is None assert model.pred_cols is not None assert model.feature_cols is not None assert_frame_equal(model.df, train_df) assert model.coef_ is not None
def assert_basic_pipeline_equal(actual: Pipeline, expected: Pipeline): """Asserts that the two pipelines are equal The Pipelines should be created by `get_basic_pipeline`. """ # checks features actual_params = actual.get_params() expected_params = expected.get_params() check_keys = [ 'input__date__select_date__column_names', 'input__response__select_val__column_names', 'input__response__outlier__use_fit_baseline', 'input__response__outlier__z_cutoff', 'input__response__null__impute_algorithm', 'input__response__null__impute_all', 'input__response__null__impute_params', 'input__response__null__max_frac', 'input__regressors_numeric__select_reg__column_names', 'input__regressors_numeric__select_reg_numeric__exclude', 'input__regressors_numeric__select_reg_numeric__include', 'input__regressors_numeric__outlier__use_fit_baseline', 'input__regressors_numeric__outlier__z_cutoff', 'input__regressors_numeric__normalize__normalize_algorithm', 'input__regressors_numeric__normalize__normalize_params', 'input__regressors_numeric__null__impute_algorithm', 'input__regressors_numeric__null__impute_all', 'input__regressors_numeric__null__impute_params', 'input__regressors_numeric__null__max_frac', 'input__regressors_other__select_reg__column_names', 'input__regressors_other__select_reg_non_numeric__exclude', 'input__regressors_other__select_reg_non_numeric__include', 'degenerate__drop_degenerate' ] for key in check_keys: assert actual_params[key] == expected_params[key],\ f"{key} is different, found {actual_params[key]}, expected {expected_params[key]}" # checks estimator actual_estimator = actual.steps[-1][-1] expected_estimator = expected.steps[-1][-1] assert isinstance(actual_estimator, type(expected_estimator)) actual_estimator_dict = actual_estimator.__dict__.copy() expected_estimator_dict = expected_estimator.__dict__.copy() del actual_estimator_dict["null_model"] del expected_estimator_dict["null_model"] actual_estimator_dict.pop("silverkite", None) expected_estimator_dict.pop("silverkite", None) actual_estimator_dict.pop("silverkite_diagnostics", None) expected_estimator_dict.pop("silverkite_diagnostics", None) actual_score_func = actual_estimator_dict.pop("score_func") expected_score_func = expected_estimator_dict.pop("score_func") assert_equal(actual_estimator_dict, expected_estimator_dict) with warnings.catch_warnings(): warnings.simplefilter("ignore") assert_eval_function_equal(actual_score_func, expected_score_func)
def test_silverkite_template(): """Tests test_silverkite_template with default config""" data = generate_df_for_tests(freq="D", periods=10) df = data["df"] template = SilverkiteTemplate() config = ForecastConfig(model_template="SK") params = template.apply_template_for_pipeline_params( df=df, config=config ) assert config == ForecastConfig(model_template="SK") # not modified pipeline = params.pop("pipeline", None) metric = EvaluationMetricEnum.MeanAbsolutePercentError expected_params = dict( df=df, time_col=TIME_COL, value_col=VALUE_COL, date_format=None, freq=None, train_end_date=None, anomaly_info=None, # model regressor_cols=None, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=None, n_jobs=COMPUTATION_N_JOBS, verbose=1, # forecast forecast_horizon=None, coverage=None, test_horizon=None, periods_between_train_test=None, agg_periods=None, agg_func=None, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, null_model_params=None, relative_error_tolerance=None, # CV cv_horizon=None, cv_min_train_periods=None, cv_expanding_window=True, cv_periods_between_splits=None, cv_periods_between_train_test=None, cv_max_splits=3 ) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_forecast_one_by_one_fcn(): ts = pd.date_range(start="1/1/2018", end="1/10/2018") df = pd.DataFrame({"ts": ts, "y": range(10)}) # A simple train-forecast function only for testing def train_forecast_func(df, value_col, time_col=None, forecast_horizon=1): # Gets last value and adds forecast horizon. # This is not meant to be a good forecast. # Rather, it is intended for test values to be simple enough to be derived # manually. value = df[value_col].values[-1] + forecast_horizon fut_df = pd.DataFrame({value_col: [value] * forecast_horizon}) summary = {"mock summary": forecast_horizon} trained_model = {"summary": summary} return {"fut_df": fut_df, "trained_model": trained_model} # Forecasts with the original function ``train_forecast_func`` forecast1 = train_forecast_func(df=df, value_col="y", time_col="ts", forecast_horizon=7) fut_df1 = forecast1["fut_df"] assert list(fut_df1["y"].values) == [16] * 7 # Forecasts with the composed function: ``forecast_one_by_one_fcn(train_forecast_func)`` forecast2 = forecast_one_by_one_fcn(train_forecast_func)( df=df, value_col="y", time_col="ts", forecast_horizon=7) # Checks if forecasted values are as expected fut_df2 = forecast2["fut_df"] assert list(fut_df2["y"].values) == list(range(10, 17)) # Checks if the trained models are as expected trained_model = forecast2["trained_model"] assert trained_model == {"summary": {"mock summary": 7}} trained_models_per_horizon = forecast2["trained_models_per_horizon"] for k in range(1, 8): assert trained_models_per_horizon[k] == { "summary": { "mock summary": k } } # Tests `model_params`. Passes some model params directly to `forecast_one_by_one_fcn` forecast3 = forecast_one_by_one_fcn(train_forecast_func, df=df, time_col="ts", forecast_horizon=7)(value_col="y") assert_equal(forecast2, forecast3)
def generic_test_adjust_anomalous_data(value_col, adj_df_info, adj_values): """Generic test for the results of any given scenario""" augmented_df = adj_df_info["augmented_df"] adjusted_df = adj_df_info["adjusted_df"] assert list(adjusted_df.columns) == ["ts", "y", "z"] assert list( augmented_df.columns) == ["ts", "y", "z", f"adjusted_{value_col}"] assert_equal(adjusted_df[value_col], augmented_df[f"adjusted_{value_col}"], check_names=False) assert_equal(augmented_df[:len(adj_values)][f"adjusted_{value_col}"], adj_values, check_names=False)
def test_prophet_hyperparameter_grid_events(): """Tests get_prophet_hyperparameter_grid for selected Countries" holidays""" # holiday params start_year = 2018 end_year = 2022 holiday_pre_num_days = [1] holiday_post_num_days = [1] holiday_lookup_countries = ["UnitedStates", "China", "India"] holidays_prior_scale = [5.0, 10.0, 15.0] events = { "holiday_lookup_countries": holiday_lookup_countries, "holiday_pre_num_days": holiday_pre_num_days, "holiday_post_num_days": holiday_post_num_days, "start_year": start_year, "end_year": end_year, "holidays_prior_scale": holidays_prior_scale } model_components = ModelComponentsParam(events=events) template = ProphetTemplate() template.config = template.apply_forecast_config_defaults() template.config.model_components_param = model_components hyperparameter_grid = template.get_hyperparameter_grid() # Expected Values # Holidays df, based on given holidays params expected_holidays = template.get_prophet_holidays( year_list=list(range(start_year - 1, end_year + 2)), countries=holiday_lookup_countries, lower_window=-holiday_pre_num_days[0], upper_window=holiday_post_num_days[0]) expected_grid = { "estimator__growth": ["linear"], "estimator__seasonality_mode": ["additive"], "estimator__seasonality_prior_scale": [10.0], "estimator__yearly_seasonality": ["auto"], "estimator__weekly_seasonality": ["auto"], "estimator__daily_seasonality": ["auto"], "estimator__add_seasonality_dict": [None], "estimator__holidays": [expected_holidays], "estimator__holidays_prior_scale": [5.0, 10.0, 15.0], "estimator__changepoint_prior_scale": [0.05], "estimator__changepoints": [None], "estimator__n_changepoints": [25], "estimator__changepoint_range": [0.8], "estimator__mcmc_samples": [0], "estimator__uncertainty_samples": [1000], "estimator__add_regressor_dict": [None] } # Assertions assert_equal(actual=hyperparameter_grid, expected=expected_grid)
def test_fit_predict(daily_data): """Tests fit and predict.""" model = AutoArimaEstimator() train_df = daily_data["train_df"] test_df = daily_data["test_df"] assert model.last_predicted_X_ is None assert model.cached_predictions_ is None model.fit(train_df, time_col=TIME_COL, value_col=VALUE_COL) assert model.last_predicted_X_ is None assert model.cached_predictions_ is None with LogCapture(LOGGER_NAME) as log_capture: predicted = model.predict(test_df) assert list(predicted.columns) == [ TIME_COL, PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL ] assert_equal(model.last_predicted_X_, test_df) assert_equal(model.cached_predictions_, predicted) log_capture.check() # no log messages (not using cached predictions) y_true = test_df[VALUE_COL] y_pred = predicted[PREDICTED_COL] err = calc_pred_err(y_true, y_pred) enum = EvaluationMetricEnum.Correlation assert err[enum.get_metric_name()] > 0.50 enum = EvaluationMetricEnum.MeanAbsoluteError assert err[enum.get_metric_name()] < 2.5 enum = EvaluationMetricEnum.RootMeanSquaredError assert err[enum.get_metric_name()] < 3.0 enum = EvaluationMetricEnum.MedianAbsoluteError assert err[enum.get_metric_name()] < 3.0 # Uses cached predictions with LogCapture(LOGGER_NAME) as log_capture: assert_equal(model.predict(test_df), predicted) log_capture.check((LOGGER_NAME, LoggingLevelEnum.DEBUG.name, "Returning cached predictions.")) # Predicts on a different dataset with LogCapture(LOGGER_NAME) as log_capture: predicted = model.predict(train_df) assert_equal(model.last_predicted_X_, train_df) assert_equal(model.cached_predictions_, predicted) log_capture.check() # no log messages (not using cached predictions) # .fit() clears the cached result model.fit(train_df, time_col=TIME_COL, value_col=VALUE_COL) assert model.last_predicted_X_ is None assert model.cached_predictions_ is None
def test_dictionaries_values_to_lists(): """Tests dictionaries_values_to_lists""" hyperparameter_grid = { "param1": [], "param2": [], "param3": [None], "param4": [None], } original_grid = hyperparameter_grid.copy() result = dictionaries_values_to_lists( hyperparameter_grid, hyperparameters_list_type={"param2", "param4"}) expected_grid = { "param1": [], "param2": [[]], "param3": [None], "param4": [None], } assert_equal(result, expected_grid) assert_equal(hyperparameter_grid, original_grid) hyperparameter_grids = [hyperparameter_grid, hyperparameter_grid] original_grid = hyperparameter_grids.copy() result = dictionaries_values_to_lists( hyperparameter_grids, hyperparameters_list_type={"param2", "param4"}) assert_equal(result, [expected_grid, expected_grid]) assert_equal(hyperparameter_grids, original_grid)
def test_load_data_anomaly(): """Checks anomaly_info parameter""" dl = DataLoaderTS() df = dl.load_beijing_pm() value_col = "pm" # no anomaly adjustment ts = UnivariateTimeSeries() ts.load_data(df=df, value_col=value_col) assert ts.df_before_adjustment is None # adjusts two columns dim_one = "one" dim_two = "two" anomaly_df = pd.DataFrame({ START_DATE_COL: ["2011-04-04-10", "2011-10-10-00", "2012-12-20-10"], END_DATE_COL: ["2011-04-05-20", "2011-10-11-23", "2012-12-20-13"], ADJUSTMENT_DELTA_COL: [np.nan, 100.0, -100.0], METRIC_COL: [dim_one, dim_one, dim_two] }) anomaly_info = [{ "value_col": value_col, "anomaly_df": anomaly_df, "start_date_col": START_DATE_COL, "end_date_col": END_DATE_COL, "adjustment_delta_col": ADJUSTMENT_DELTA_COL, "filter_by_dict": { METRIC_COL: dim_one }, "adjustment_method": "add" }, { "value_col": "pres", "anomaly_df": anomaly_df, "start_date_col": START_DATE_COL, "end_date_col": END_DATE_COL, "adjustment_delta_col": ADJUSTMENT_DELTA_COL, "filter_by_dict": { METRIC_COL: dim_two }, "adjustment_method": "subtract" }] ts = UnivariateTimeSeries() ts.load_data(df=df, value_col=value_col, anomaly_info=anomaly_info) canonical_data_dict = get_canonical_data(df=df, value_col=value_col, anomaly_info=anomaly_info) assert_equal(ts.df, canonical_data_dict["df"]) assert_equal(ts.df_before_adjustment, canonical_data_dict["df_before_adjustment"])
def test_extract_forecasts(valid_bm, df, valid_configs, custom_tscv): bm = valid_bm bm.extract_forecasts() for config_name, config in bm.result.items(): rolling_forecast_df = config["rolling_forecast_df"] # Addition of train_end_date, forecast_step & step_num results in 8 columns assert rolling_forecast_df.shape == (custom_tscv.forecast_horizon * custom_tscv.max_splits, 8) # Checks train_end_dates column train_end_date_values = rolling_forecast_df["train_end_date"].values expected_train_end_date_values = \ np.repeat([np.datetime64("2018-10-06"), np.datetime64("2018-10-13"), np.datetime64("2018-10-20")], custom_tscv.forecast_horizon) np.testing.assert_array_equal(train_end_date_values, expected_train_end_date_values) # Checks forecast_step column assert_equal( rolling_forecast_df[FORECAST_STEP_COL].values, np.tile( np.arange(custom_tscv.forecast_horizon) + 1, custom_tscv.max_splits)) expected_columns = { "train_end_date", FORECAST_STEP_COL, "split_num", TIME_COL, ACTUAL_COL, PREDICTED_COL, PREDICTED_LOWER_COL, PREDICTED_UPPER_COL } assert expected_columns == set(rolling_forecast_df.columns) expected_columns = { "train_end_date", FORECAST_STEP_COL, "split_num", TIME_COL, ACTUAL_COL, f"valid_prophet_{PREDICTED_COL}", f"valid_prophet_{PREDICTED_LOWER_COL}", f"valid_prophet_{PREDICTED_UPPER_COL}", f"valid_silverkite_{PREDICTED_COL}", f"valid_silverkite_{PREDICTED_LOWER_COL}", f"valid_silverkite_{PREDICTED_UPPER_COL}" } assert set(bm.forecasts.columns) == expected_columns # error when `run` method has not been executed yet with pytest.raises( ValueError, match="Please execute 'run' method to create forecasts."): bm = BenchmarkForecastConfig(df=df, configs=valid_configs, tscv=custom_tscv) bm.extract_forecasts()
def test_get_evaluation_metrics(valid_bm, metric_dict, df, valid_configs, custom_tscv): bm = valid_bm # default value, all configs with pytest.warns(UserWarning): evaluation_metrics_df = bm.get_evaluation_metrics( metric_dict=metric_dict) expected_columns = {"config_name", "split_num"} for metric_name in metric_dict.keys(): expected_columns = expected_columns.union( {f"train_{metric_name}", f"test_{metric_name}"}) assert set(evaluation_metrics_df.columns) == expected_columns expected_row_num = len(bm.configs) * bm.tscv.max_splits assert evaluation_metrics_df.shape[0] == expected_row_num # check metric values assert_equal(evaluation_metrics_df["train_MSE"].values, evaluation_metrics_df["train_custom_MSE"].values) assert_equal(evaluation_metrics_df["test_MSE"].values, evaluation_metrics_df["test_custom_MSE"].values) assert evaluation_metrics_df["train_corr"].dropna().between(-1, 1).all() assert evaluation_metrics_df["test_corr"].dropna().between(-1, 1).all() # custom config value with pytest.warns(UserWarning): config_names = ["valid_silverkite"] evaluation_metrics_df = bm.get_evaluation_metrics( metric_dict=metric_dict, config_names=config_names) # columns remain the same expected_columns = {"config_name", "split_num"} for metric_name in metric_dict.keys(): expected_columns = expected_columns.union( {f"train_{metric_name}", f"test_{metric_name}"}) assert set(evaluation_metrics_df.columns) == expected_columns # number of rows change expected_row_num = len(config_names) * bm.tscv.max_splits assert evaluation_metrics_df.shape[0] == expected_row_num # error when `run` method has not been executed yet with pytest.raises(ValueError, match="Please execute the 'run' method " "before computing evaluation metrics."): bm = BenchmarkForecastConfig(df=df, configs=valid_configs, tscv=custom_tscv) bm.get_evaluation_metrics(metric_dict=metric_dict)
def test_get_weight_matrix(): """Tests get_weight_matrix""" # weights is None wmat = get_weight_matrix(weights=None, n_forecasts=3, name="weight_bias", weight_auto=None) assert_equal(np.eye(3), wmat) # weights is 'auto', weight_auto is None wmat = get_weight_matrix(weights="auto", n_forecasts=3, name="weight_bias", weight_auto=None) assert_equal(np.eye(3), wmat) # weights is 'auto', weight_auto is not None wmat = get_weight_matrix(weights="auto", n_forecasts=3, name="weight_bias", weight_auto=np.ones((3, 3))) assert_equal(np.ones((3, 3)), wmat) # weights is a list wmat = get_weight_matrix(weights=[1, 2, 3], n_forecasts=3, name="weight_bias", weight_auto=None) assert_equal(np.diag([1, 2, 3]), wmat) # weights is an array wmat = get_weight_matrix(weights=np.array([1, 2, 3]), n_forecasts=3, name="weight_bias", weight_auto=None) assert_equal(np.diag([1, 2, 3]), wmat) # exception with pytest.raises( ValueError, match="Expected square matrix with size 10, but `weight_bias` " "has weight matrix with shape \\(3, 3\\)"): get_weight_matrix( weights=np.array([1, 2, 3]), n_forecasts=10, # doesn't match len(weights) name="weight_bias", weight_auto=None)
def test_setup2(params2): """Tests __init__ and attributes set during fit""" coverage = 0.95 silverkite = SilverkiteForecast() model = SilverkiteEstimator( silverkite=silverkite, score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params2) assert model.silverkite == silverkite assert model.score_func == mean_squared_error assert model.coverage == coverage assert model.null_model_params is None # set_params must be able to replicate the init model2 = SilverkiteEstimator() model2.set_params(**dict( silverkite=silverkite, score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params2)) assert model2.__dict__ == model.__dict__ initalized_params = model.__dict__ initalized_params_subset = { k: v for k, v in initalized_params.items() if k in params2.keys()} assert_equal(initalized_params_subset, params2) assert model.model_dict is None assert model.pred_cols is None assert model.feature_cols is None assert model.coef_ is None train_df = daily_data_reg().get("train_df").copy() model.fit(train_df) assert model.model_dict is not None assert model.model_dict["training_evaluation"] is not None assert model.model_dict["test_evaluation"] is None assert model.pred_cols is not None assert model.feature_cols is not None assert_frame_equal(model.df, train_df) assert model.coef_ is not None
def test_raf_transform(data): """Tests ReconcileAdditiveForecasts transform and fit_transform""" forecasts = data["forecasts"] actuals = data["actuals"] levels = data["levels"] order_dict = data["order_dict"] raf = ReconcileAdditiveForecasts() with pytest.raises(NotFittedError, match="Must call `fit` first."): raf.transform() raf.fit(forecasts=forecasts, actuals=actuals, levels=levels, order_dict=order_dict, method="mint_sample") # transforms training data raf.transform() reordered_forecasts = reorder_columns(forecasts, order_dict=order_dict) assert raf.adjusted_forecasts is not None assert_equal(raf.adjusted_forecasts.columns, reordered_forecasts.columns ) # columns are returned according to `order_dict` order assert raf.adjusted_forecasts.shape == forecasts.shape assert np.linalg.norm( raf.constraint_matrix @ np.array(raf.adjusted_forecasts).T) < 1e-5 assert raf.adjusted_forecasts_test is None assert raf.forecasts_test is None # transforms test data forecasts_test = forecasts + 1.0 raf.transform(forecasts_test=forecasts_test) reordered_forecasts_test = reorder_columns(forecasts_test, order_dict=order_dict) assert_equal(raf.forecasts_test, reordered_forecasts_test) assert raf.adjusted_forecasts_test is not None assert_equal(raf.adjusted_forecasts_test.columns, reordered_forecasts_test.columns) assert raf.adjusted_forecasts_test.shape == forecasts_test.shape assert np.linalg.norm( raf.constraint_matrix @ np.array(raf.adjusted_forecasts_test).T) < 1e-5 # fit and transform training data raf2 = ReconcileAdditiveForecasts() raf2.fit_transform(forecasts=forecasts, actuals=actuals, levels=levels, order_dict=order_dict, method="ols") assert raf2.adjusted_forecasts is not None assert_equal(raf2.adjusted_forecasts.columns, reordered_forecasts.columns) assert raf2.adjusted_forecasts.shape == forecasts.shape assert np.linalg.norm( raf2.constraint_matrix @ np.array(raf2.adjusted_forecasts).T) < 1e-5 assert raf2.adjusted_forecasts_test is None assert raf2.forecasts_test is None
def test_autocomplete_metric_dict(valid_bm, metric_dict): bm = valid_bm updated_metric_dict = bm.autocomplete_metric_dict( metric_dict=metric_dict, enum_class=EvaluationMetricEnum) assert_equal(list(metric_dict.keys()), list(updated_metric_dict.keys())) invalid_metric_dict = { "corr": EvaluationMetricEnum.Correlation, "invalid_metric": 5 } enum_class = EvaluationMetricEnum with pytest.raises( ValueError, match="Value of 'invalid_metric' should be a callable or " f"a member of {enum_class}."): bm.autocomplete_metric_dict(metric_dict=invalid_metric_dict, enum_class=EvaluationMetricEnum)