def test_get_regressor_cols(): template = SilverkiteTemplate() template.config = template.apply_forecast_config_defaults() template.df = pd.DataFrame(columns=["p1", "p2"]) regressor_cols = template.get_regressor_cols() assert regressor_cols is None template.config.model_components_param = ModelComponentsParam( custom={} ) regressor_cols = template.get_regressor_cols() assert regressor_cols is None template.config.model_components_param = ModelComponentsParam( custom={ "extra_pred_cols": ["p1", "p2", "p3", template.config.metadata_param.time_col] } ) regressor_cols = template.get_regressor_cols() assert set(regressor_cols) == {"p1", "p2"} template.config.model_components_param = ModelComponentsParam( custom={ "extra_pred_cols": [["p1"], ["p2", "p3"], None, []] } ) regressor_cols = template.get_regressor_cols() assert set(regressor_cols) == {"p1", "p2"}
def test_apply_template_decorator(): data = generate_df_for_tests(freq="D", periods=10) df = data["df"] template = SilverkiteTemplate() with pytest.raises( ValueError, match="SilverkiteTemplate only supports config.model_template='SK', found 'PROPHET'"): template.apply_template_for_pipeline_params( df=df, config=ForecastConfig(model_template="PROPHET") )
def test_silverkite_template(): """Tests test_silverkite_template with default config""" data = generate_df_for_tests(freq="D", periods=10) df = data["df"] template = SilverkiteTemplate() config = ForecastConfig(model_template="SK") params = template.apply_template_for_pipeline_params( df=df, config=config ) assert config == ForecastConfig(model_template="SK") # not modified pipeline = params.pop("pipeline", None) metric = EvaluationMetricEnum.MeanAbsolutePercentError expected_params = dict( df=df, time_col=TIME_COL, value_col=VALUE_COL, date_format=None, freq=None, train_end_date=None, anomaly_info=None, # model regressor_cols=None, lagged_regressor_cols=None, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=None, n_jobs=COMPUTATION_N_JOBS, verbose=1, # forecast forecast_horizon=None, coverage=None, test_horizon=None, periods_between_train_test=None, agg_periods=None, agg_func=None, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, null_model_params=None, relative_error_tolerance=None, # CV cv_horizon=None, cv_min_train_periods=None, cv_expanding_window=True, cv_periods_between_splits=None, cv_periods_between_train_test=None, cv_max_splits=3 ) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_get_lagged_regressor_info(): # Without lagged regressors template = SilverkiteTemplate() template.config = template.apply_forecast_config_defaults() expected_lagged_regressor_info = { "lagged_regressor_cols": None, "overall_min_lag_order": None, "overall_max_lag_order": None } assert template.get_lagged_regressor_info() == expected_lagged_regressor_info # With lagged regressors template.config.model_components_param = ModelComponentsParam( lagged_regressors={ "lagged_regressor_dict": [{ "regressor2": { "lag_dict": {"orders": [5]}, "agg_lag_dict": { "orders_list": [[7, 7 * 2, 7 * 3]], "interval_list": [(8, 7 * 2)]}, "series_na_fill_func": lambda s: s.bfill().ffill()} }, { "regressor_bool": { "lag_dict": {"orders": [1]}, "agg_lag_dict": { "orders_list": [[7, 7 * 2]], "interval_list": [(8, 7 * 2)]}, "series_na_fill_func": lambda s: s.bfill().ffill()} }] }) lagged_regressor_info = template.get_lagged_regressor_info() assert set(lagged_regressor_info["lagged_regressor_cols"]) == {"regressor2", "regressor_bool"} assert lagged_regressor_info["overall_min_lag_order"] == 1 assert lagged_regressor_info["overall_max_lag_order"] == 21
def test_property(): """Tests properties""" assert SilverkiteTemplate().allow_model_template_list is False assert SilverkiteTemplate().allow_model_components_param_list is False template = SilverkiteTemplate() assert template.DEFAULT_MODEL_TEMPLATE == "SK" assert isinstance(template.estimator, SilverkiteEstimator) assert template.estimator.coverage is None assert template.apply_forecast_config_defaults().model_template == "SK" estimator = SilverkiteEstimator(coverage=0.99) template = SilverkiteTemplate(estimator=estimator) assert template.estimator is estimator
def test_silverkite_template_custom(model_components_param): """"Tests simple_silverkite_template with custom parameters, and data that has regressors""" data = generate_df_with_reg_for_tests( freq="H", periods=300*24, remove_extra_cols=True, mask_test_actuals=True) df = data["df"] time_col = "some_time_col" value_col = "some_value_col" df.rename({ TIME_COL: time_col, VALUE_COL: value_col }, axis=1, inplace=True) metric = EvaluationMetricEnum.MeanAbsoluteError # anomaly adjustment adds 10.0 to every record adjustment_size = 10.0 anomaly_df = pd.DataFrame({ START_DATE_COL: [df[time_col].min()], END_DATE_COL: [df[time_col].max()], ADJUSTMENT_DELTA_COL: [adjustment_size], METRIC_COL: [value_col] }) anomaly_info = { "value_col": VALUE_COL, "anomaly_df": anomaly_df, "start_date_col": START_DATE_COL, "end_date_col": END_DATE_COL, "adjustment_delta_col": ADJUSTMENT_DELTA_COL, "filter_by_dict": {METRIC_COL: VALUE_COL}, "adjustment_method": "add" } metadata = MetadataParam( time_col=time_col, value_col=value_col, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 7, 1), anomaly_info=anomaly_info ) evaluation_metric = EvaluationMetricParam( cv_selection_metric=metric.name, cv_report_metrics=[EvaluationMetricEnum.MedianAbsolutePercentError.name], agg_periods=24, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }, relative_error_tolerance=0.01 ) evaluation_period = EvaluationPeriodParam( test_horizon=1, periods_between_train_test=2, cv_horizon=3, cv_min_train_periods=4, cv_expanding_window=True, cv_periods_between_splits=5, cv_periods_between_train_test=6, cv_max_splits=7 ) computation = ComputationParam( hyperparameter_budget=10, n_jobs=None, verbose=1 ) forecast_horizon = 20 coverage = 0.7 template = SilverkiteTemplate() params = template.apply_template_for_pipeline_params( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SK.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, model_components_param=model_components_param, computation_param=computation ) ) pipeline = params.pop("pipeline", None) expected_params = dict( df=df, time_col=time_col, value_col=value_col, date_format=metadata.date_format, freq=metadata.freq, train_end_date=metadata.train_end_date, anomaly_info=metadata.anomaly_info, # model regressor_cols=template.regressor_cols, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=computation.hyperparameter_budget, n_jobs=computation.n_jobs, verbose=computation.verbose, # forecast forecast_horizon=forecast_horizon, coverage=coverage, test_horizon=evaluation_period.test_horizon, periods_between_train_test=evaluation_period.periods_between_train_test, agg_periods=evaluation_metric.agg_periods, agg_func=evaluation_metric.agg_func, relative_error_tolerance=evaluation_metric.relative_error_tolerance, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=evaluation_metric.cv_report_metrics, null_model_params=evaluation_metric.null_model_params, # CV cv_horizon=evaluation_period.cv_horizon, cv_min_train_periods=evaluation_period.cv_min_train_periods, cv_expanding_window=evaluation_period.cv_expanding_window, cv_periods_between_splits=evaluation_period.cv_periods_between_splits, cv_periods_between_train_test=evaluation_period.cv_periods_between_train_test, cv_max_splits=evaluation_period.cv_max_splits ) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_get_silverkite_hyperparameter_grid(model_components_param, silverkite, silverkite_diagnostics): template = SilverkiteTemplate() template.config = template.apply_forecast_config_defaults() hyperparameter_grid = template.get_hyperparameter_grid() expected_grid = { "estimator__silverkite": [SilverkiteForecast()], "estimator__silverkite_diagnostics": [SilverkiteDiagnostics()], "estimator__origin_for_time_vars": [None], "estimator__extra_pred_cols": [["ct1"]], "estimator__train_test_thresh": [None], "estimator__training_fraction": [None], "estimator__fit_algorithm_dict": [{ "fit_algorithm": "linear", "fit_algorithm_params": None}], "estimator__daily_event_df_dict": [None], "estimator__fs_components_df": [pd.DataFrame({ "name": ["tod", "tow", "tom", "toq", "toy"], "period": [24.0, 7.0, 1.0, 1.0, 1.0], "order": [3, 3, 1, 1, 5], "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})], "estimator__autoreg_dict": [None], "estimator__changepoints_dict": [None], "estimator__seasonality_changepoints_dict": [None], "estimator__changepoint_detector": [None], "estimator__min_admissible_value": [None], "estimator__max_admissible_value": [None], "estimator__uncertainty_dict": [None], } assert_equal(hyperparameter_grid, expected_grid, ignore_keys={"estimator__silverkite": None, "estimator__silverkite_diagnostics": None}) assert hyperparameter_grid["estimator__silverkite"][0] != silverkite assert hyperparameter_grid["estimator__silverkite_diagnostics"][0] != silverkite_diagnostics # Tests auto-list conversion template.config.model_components_param = model_components_param template.time_properties = {"origin_for_time_vars": 2020} hyperparameter_grid = template.get_hyperparameter_grid() expected_grid = { "estimator__silverkite": [silverkite], "estimator__silverkite_diagnostics": [silverkite_diagnostics], "estimator__origin_for_time_vars": [2020], "estimator__extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]], "estimator__train_test_thresh": [None], "estimator__training_fraction": [None], "estimator__fit_algorithm_dict": [{ "fit_algorithm": "linear", "fit_algorithm_params": None, }], "estimator__daily_event_df_dict": [None], "estimator__fs_components_df": [None], "estimator__autoreg_dict": [None], "estimator__changepoints_dict": [{ "method": "uniform", "n_changepoints": 20, }], "estimator__seasonality_changepoints_dict": [None], "estimator__changepoint_detector": [None], "estimator__min_admissible_value": [None], "estimator__max_admissible_value": [4], "estimator__uncertainty_dict": [{ "uncertainty_method": "simple_conditional_residuals" }], } assert_equal(hyperparameter_grid, expected_grid) # Tests hyperparameter_override template.config.model_components_param.hyperparameter_override = [ { "input__response__null__max_frac": 0.1, "estimator__min_admissible_value": [2], "estimator__extra_pred_cols": ["override_estimator__extra_pred_cols"], }, {}, { "estimator__extra_pred_cols": ["val1", "val2"], "estimator__origin_for_time_vars": [2019], }, None ] template.time_properties = {"origin_for_time_vars": 2020} hyperparameter_grid = template.get_hyperparameter_grid() expected_grid["estimator__origin_for_time_vars"] = [2020] updated_grid1 = expected_grid.copy() updated_grid1["input__response__null__max_frac"] = [0.1] updated_grid1["estimator__min_admissible_value"] = [2] updated_grid1["estimator__extra_pred_cols"] = [["override_estimator__extra_pred_cols"]] updated_grid2 = expected_grid.copy() updated_grid2["estimator__extra_pred_cols"] = [["val1", "val2"]] updated_grid2["estimator__origin_for_time_vars"] = [2019] expected_grid = [ updated_grid1, expected_grid, updated_grid2, expected_grid] assert_equal(hyperparameter_grid, expected_grid)