def test_apply_template_decorator(): data = generate_df_for_tests(freq="D", periods=10) df = data["df"] template = ProphetTemplate() with pytest.raises( ValueError, match= "ProphetTemplate only supports config.model_template='PROPHET', found 'UNKNOWN'" ): template.apply_template_for_pipeline_params( df=df, config=ForecastConfig(model_template="UNKNOWN"))
def test_prophet_template_default(): """Tests prophet_template with default values, for limited data""" # prepares input data num_days = 10 data = generate_df_for_tests(freq="D", periods=num_days, train_start_date="2018-01-01") df = data["df"] template = ProphetTemplate() config = ForecastConfig(model_template="PROPHET") params = template.apply_template_for_pipeline_params(df=df, config=config) # not modified assert config == ForecastConfig(model_template="PROPHET") # checks result metric = EvaluationMetricEnum.MeanAbsolutePercentError pipeline = params.pop("pipeline", None) expected_params = dict( df=df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL, date_format=None, freq=None, train_end_date=None, anomaly_info=None, # model regressor_cols=None, lagged_regressor_cols=None, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=None, n_jobs=COMPUTATION_N_JOBS, verbose=1, # forecast forecast_horizon=None, coverage=None, test_horizon=None, periods_between_train_test=None, agg_periods=None, agg_func=None, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, null_model_params=None, relative_error_tolerance=None, # CV cv_horizon=None, cv_min_train_periods=None, cv_expanding_window=True, cv_periods_between_splits=None, cv_periods_between_train_test=None, cv_max_splits=3) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_prophet_template_custom(): """Tests prophet_template with custom values, with long range input""" # prepares input data data = generate_df_with_reg_for_tests(freq="H", periods=300 * 24, remove_extra_cols=True, mask_test_actuals=True) df = data["df"] time_col = "some_time_col" value_col = "some_value_col" df.rename({ cst.TIME_COL: time_col, cst.VALUE_COL: value_col }, axis=1, inplace=True) # prepares params and calls template metric = EvaluationMetricEnum.MeanAbsoluteError # anomaly adjustment adds 10.0 to every record adjustment_size = 10.0 anomaly_df = pd.DataFrame({ cst.START_DATE_COL: [df[time_col].min()], cst.END_DATE_COL: [df[time_col].max()], cst.ADJUSTMENT_DELTA_COL: [adjustment_size], cst.METRIC_COL: [value_col] }) anomaly_info = { "value_col": cst.VALUE_COL, "anomaly_df": anomaly_df, "start_date_col": cst.START_DATE_COL, "end_date_col": cst.END_DATE_COL, "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL, "filter_by_dict": { cst.METRIC_COL: cst.VALUE_COL }, "adjustment_method": "add" } metadata = MetadataParam( time_col=time_col, value_col=value_col, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 7, 1), anomaly_info=anomaly_info, ) evaluation_metric = EvaluationMetricParam( cv_selection_metric=metric.name, cv_report_metrics=[ EvaluationMetricEnum.MedianAbsolutePercentError.name ], agg_periods=24, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }, relative_error_tolerance=0.01) evaluation_period = EvaluationPeriodParam(test_horizon=1, periods_between_train_test=2, cv_horizon=3, cv_min_train_periods=4, cv_expanding_window=True, cv_periods_between_splits=5, cv_periods_between_train_test=6, cv_max_splits=7) model_components = ModelComponentsParam( seasonality={ "yearly_seasonality": [True], "weekly_seasonality": [False], "daily_seasonality": [4], "add_seasonality_dict": [{ "yearly": { "period": 365.25, "fourier_order": 20, "prior_scale": 20.0 }, "quarterly": { "period": 365.25 / 4, "fourier_order": 15 }, "weekly": { "period": 7, "fourier_order": 35, "prior_scale": 30.0 } }] }, growth={"growth_term": "linear"}, events={ "holiday_lookup_countries": ["UnitedStates", "UnitedKingdom", "India"], "holiday_pre_num_days": [2], "holiday_post_num_days": [3], "holidays_prior_scale": [5.0] }, regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10.0, "mode": 'additive' }, "regressor2": { "prior_scale": 20.0, "mode": 'multiplicative' }, }] }, changepoints={ "changepoint_prior_scale": [0.05], "changepoints": [None], "n_changepoints": [50], "changepoint_range": [0.9] }, uncertainty={ "mcmc_samples": [500], "uncertainty_samples": [2000] }, hyperparameter_override={ "input__response__null__impute_algorithm": "ts_interpolate", "input__response__null__impute_params": { "orders": [7, 14] }, "input__regressors_numeric__normalize__normalize_algorithm": "RobustScaler", }) computation = ComputationParam(hyperparameter_budget=10, n_jobs=None, verbose=1) forecast_horizon = 20 coverage = 0.7 config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, model_components_param=model_components, computation_param=computation) template = ProphetTemplate() params = template.apply_template_for_pipeline_params(df=df, config=config) pipeline = params.pop("pipeline", None) # Adding start_year and end_year based on the input df model_components.events["start_year"] = df[time_col].min().year model_components.events["end_year"] = df[time_col].max().year expected_params = dict( df=df, time_col=time_col, value_col=value_col, date_format=metadata.date_format, freq=metadata.freq, train_end_date=metadata.train_end_date, anomaly_info=metadata.anomaly_info, # model regressor_cols=template.regressor_cols, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=computation.hyperparameter_budget, n_jobs=computation.n_jobs, verbose=computation.verbose, # forecast forecast_horizon=forecast_horizon, coverage=coverage, test_horizon=evaluation_period.test_horizon, periods_between_train_test=evaluation_period. periods_between_train_test, agg_periods=evaluation_metric.agg_periods, agg_func=evaluation_metric.agg_func, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=evaluation_metric.cv_report_metrics, null_model_params=evaluation_metric.null_model_params, relative_error_tolerance=evaluation_metric.relative_error_tolerance, # CV cv_horizon=evaluation_period.cv_horizon, cv_min_train_periods=evaluation_period.cv_min_train_periods, cv_expanding_window=evaluation_period.cv_expanding_window, cv_periods_between_splits=evaluation_period.cv_periods_between_splits, cv_periods_between_train_test=evaluation_period. cv_periods_between_train_test, cv_max_splits=evaluation_period.cv_max_splits) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)