예제 #1
0
def test_init():
    """Tests constructor"""
    forecaster = Forecaster()
    assert forecaster.model_template_enum == ModelTemplateEnum
    assert forecaster.default_model_template_name == "SILVERKITE"
    forecaster = Forecaster(model_template_enum=MyModelTemplateEnum,
                            default_model_template_name="MYSILVERKITE")
    assert forecaster.model_template_enum == MyModelTemplateEnum
    assert forecaster.default_model_template_name == "MYSILVERKITE"
예제 #2
0
def test_get_config_with_default_model_template_and_components():
    """Tests `__get_config_with_default_model_template_and_components`"""
    forecaster = Forecaster()
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
    )
    assert config == ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        model_components_param=ModelComponentsParam())

    # Overrides `default_model_template_name`, unnests `model_components_param`.
    forecaster = Forecaster(default_model_template_name="SK")
    config = ForecastConfig(model_components_param=[ModelComponentsParam()])
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
        config)
    assert config == ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        model_components_param=ModelComponentsParam())

    # Overrides `model_template_enum` and `default_model_template_name`
    forecaster = Forecaster(model_template_enum=MyModelTemplateEnum,
                            default_model_template_name="MYSILVERKITE")
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
    )
    assert config == ForecastConfig(
        model_template=MyModelTemplateEnum.MYSILVERKITE.name,
        model_components_param=ModelComponentsParam())
예제 #3
0
def test_estimator_get_coef_summary_from_forecaster():
    """Tests model summary for silverkite model with missing values in value_col after everything is setup by Forecaster"""
    dl = DataLoader()
    df_pt = dl.load_peyton_manning()
    config = ForecastConfig().from_dict(
        dict(model_template=ModelTemplateEnum.SILVERKITE.name,
             forecast_horizon=10,
             metadata_param=dict(time_col="ts", value_col="y", freq="D"),
             model_components_param=dict(
                 custom={"fit_algorithm_dict": {
                     "fit_algorithm": "linear"
                 }})))
    result = Forecaster().run_forecast_config(
        df=df_pt[:365],  # shortens df to speed up
        config=config)
    summary = result.model[-1].summary()
    x = summary.get_coef_summary(is_intercept=True, return_df=True)
    assert x.shape[0] == 1
    summary.get_coef_summary(is_time_feature=True)
    summary.get_coef_summary(is_event=True)
    summary.get_coef_summary(is_trend=True)
    summary.get_coef_summary(is_interaction=True)
    x = summary.get_coef_summary(is_lag=True)
    assert x is None
    x = summary.get_coef_summary(is_trend=True,
                                 is_seasonality=False,
                                 is_interaction=False,
                                 return_df=True)
    assert all([":" not in col for col in x["Pred_col"].tolist()])
    assert "ct1" in x["Pred_col"].tolist()
    assert "sin1_ct1_yearly" not in x["Pred_col"].tolist()
    x = summary.get_coef_summary(return_df=True)
    assert x.shape[0] == summary.info_dict["coef_summary_df"].shape[0]
예제 #4
0
def test_run_template_1():
    """Runs default template"""
    data = generate_df_for_tests(
        freq="H",
        periods=700 * 24)
    df = data["train_df"]
    forecast_horizon = data["test_df"].shape[0]

    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )

        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.037, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(0.836, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.004, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.800, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=None,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
예제 #5
0
def test_run_forecast_json():
    """Tests:
     - no coverage
     - hourly data (2+ years)
     - default `hyperparameter_grid` (all interaction terms enabled)
    """
    # sets random state for consistent comparison
    data = generate_df_for_tests(freq="H", periods=700 * 24)
    df = data["train_df"]

    json_str = """{
        "model_template": "SILVERKITE",
        "forecast_horizon": 3359,
        "model_components_param": {
            "custom": {
                "fit_algorithm_dict": {
                    "fit_algorithm": "linear"
                }
            }
        }
    }"""

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        forecaster = Forecaster()
        result = forecaster.run_forecast_json(df=df, json_str=json_str)

        mse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[mse] == pytest.approx(2.120,
                                                                     rel=0.03)
        assert result.backtest.test_evaluation[q80] == pytest.approx(0.863,
                                                                     rel=0.02)
        assert result.forecast.train_evaluation[mse] == pytest.approx(1.975,
                                                                      rel=0.02)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.786,
                                                                      rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=None,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
예제 #6
0
    def __init__(self,
                 df: pd.DataFrame,
                 configs: Dict[str, ForecastConfig],
                 tscv: RollingTimeSeriesSplit,
                 forecaster: Forecaster = Forecaster()):
        self.df = df
        self.configs = configs
        self.tscv = tscv
        self.forecaster = forecaster

        self.is_run = False

        # output
        self.result = dict.fromkeys(configs.keys())
        self.forecasts = None
예제 #7
0
def test_benchmark_class_init(df, valid_configs, custom_tscv):
    forecaster = Forecaster()
    bm = BenchmarkForecastConfig(df=df,
                                 configs=valid_configs,
                                 tscv=custom_tscv,
                                 forecaster=forecaster)

    assert_equal(bm.df, df)
    assert_equal(bm.configs, valid_configs)
    assert_equal(bm.forecaster, forecaster)
    assert not bm.is_run
    assert_equal(bm.result, dict.fromkeys(bm.configs.keys()))

    # error due to missing configs and df parameters
    with pytest.raises(
            TypeError,
            match=fr"__init__\(\) missing 2 required positional arguments: "
            fr"'df' and 'configs'"):
        BenchmarkForecastConfig(tscv=custom_tscv)
예제 #8
0
def test_run_template_4():
    """Runs custom template with monthly data and auto-regression"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=["ct2"]),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.95, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
예제 #9
0
def test_estimator_plot_components_from_forecaster():
    """Tests estimator's plot_components function after the Forecaster has set everything up at the top most level"""
    # Test with real data (Female-births) via model template
    dl = DataLoader()
    data_path = dl.get_data_home(data_sub_dir="daily")
    df = dl.get_df(data_path=data_path, data_name="daily_female_births")
    metadata = MetadataParam(time_col="Date", value_col="Births", freq="D")
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": True,
            "quarterly_seasonality": True,
            "weekly_seasonality": True,
            "daily_seasonality": False
        })
    result = Forecaster().run_forecast_config(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SILVERKITE.name,
            forecast_horizon=30,  # forecast 1 month
            coverage=0.95,  # 95% prediction intervals
            metadata_param=metadata,
            model_components_param=model_components))
    estimator = result.model.steps[-1][-1]
    assert estimator.plot_components()
예제 #10
0
def test_run_template_2():
    """Runs custom template with all options"""
    data = generate_df_with_reg_for_tests(
        freq="D",
        periods=400,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    daily_event_df_dict = generate_holiday_events(
        countries=["UnitedStates"],
        holidays_to_model_separately=["New Year's Day"],
        year_start=2017,
        year_end=2022,
        pre_num=2,
        post_num=2)
    event_pred_cols = get_event_pred_cols(daily_event_df_dict)
    model_components = ModelComponentsParam(
        seasonality={
            "fs_components_df": pd.DataFrame({
                "name": ["tow", "tom", "toq", "toy"],
                "period": [7.0, 1.0, 1.0, 1.0],
                "order": [2, 1, 1, 5],
                "seas_names": ["weekly", "monthly", "quarterly", "yearly"]
            })
        },
        events={
            "daily_event_df_dict": daily_event_df_dict
        },
        changepoints={
            "changepoints_dict": {
                "method": "auto",
                "yearly_seasonality_order": 3,
                "regularization_strength": 0.5,
                "resample_freq": "14D",
                "potential_changepoint_distance": "56D",
                "no_changepoint_proportion_from_end": 0.2
            },
            "seasonality_changepoints_dict": {
                "potential_changepoint_distance": "60D",
                "regularization_strength": 0.5,
                "no_changepoint_proportion_from_end": 0.2
            },
        },
        autoregression=None,
        uncertainty={
            "uncertainty_dict": None,
        },
        custom={
            "origin_for_time_vars": None,
            "extra_pred_cols": [["ct1"] + reg_cols + event_pred_cols],  # growth, regressors, events
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {"cv": 2}
            },
            "min_admissible_value": min(df[VALUE_COL]) - abs(max(df[VALUE_COL])),
            "max_admissible_value": max(df[VALUE_COL]) * 2,
        }
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.692, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.531, rel=1e-2)
        assert result.backtest.test_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.823, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.304, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.921, rel=1e-2)
        assert result.forecast.train_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.897, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
예제 #11
0
    },
    custom={"fit_algorithm_dict": {
        "fit_algorithm": "ridge"
    }})  # use ridge to prevent overfitting when there many changepoints

# Generates model config
config = ForecastConfig.from_dict(
    dict(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        forecast_horizon=365,  # forecast 1 year
        coverage=0.95,  # 95% prediction intervals
        metadata_param=metadata,
        model_components_param=model_components))

# Then run with changepoint parameters
forecaster = Forecaster()
result = forecaster.run_forecast_config(df=df, config=config)

# %%
#
# .. note::
#   The automatic trend changepoint detection algorithm also supports adding additional custom trend
#   changepoints in forecasts. In the ``changepoints_dict`` parameter above, you may add the following
#   parameters to include additional trend changepoints besides the detected ones:
#
#     - ``dates``: a list of custom trend changepoint dates, parsable by `pandas.to_datetime`. For example, ["2020-01-01", "2020-02-15"].
#     - ``combine_changepoint_min_distance``: the minimum distance allowed between a detected changepoint and a custom changepoint, default is None.
#       For example, "5D". If violated, one of them will be dropped according to the next parameter ``keep_detected``.
#     - ``keep_detected``: True or False, default False. Decides whether to keep the detected changepoint or the custom changepoint when they are too close.
#       If set to True, keeps the detected changepoint, otherwise keeps the custom changepoint.
예제 #12
0
def seek_the_oracle(
    df_index,
    series,
    col,
    forecast_length,
    freq,
    prediction_interval=0.9,
    model_template='silverkite',
    growth=None,
    holiday=True,
    holiday_country="UnitedStates",
    regressors=None,
    verbose=0,
    inner_n_jobs=1,
    **kwargs
):
    """Internal. For loop or parallel version of Greykite."""
    inner_df = pd.DataFrame(
        {
            'ts': df_index,
            'y': series,
        }
    )
    if regressors is not None:
        inner_regr = regressors.copy()
        new_names = [
            'rrrr' + str(x) if x in inner_df.columns else str(x)
            for x in inner_regr.columns
        ]
        inner_regr.columns = new_names
        inner_regr.index.name = 'ts'
        inner_regr.reset_index(drop=False, inplace=True)
        inner_df = inner_df.merge(inner_regr, left_on='ts', right_on='ts', how='outer')
    metadata = MetadataParam(
        time_col="ts",  # name of the time column ("date" in example above)
        value_col="y",  # name of the value column ("sessions" in example above)
        freq=freq,  # "H" for hourly, "D" for daily, "W" for weekly, etc.
    )
    # INCLUDE forecast_length lagged mean and std of other features!
    model_template = ModelTemplateEnum.SILVERKITE.name
    forecaster = Forecaster()  # Creates forecasts and stores the result
    if regressors is not None:
        model_components = ModelComponentsParam(
            growth=growth, regressors={"regressor_cols": new_names}
        )
    else:
        model_components = ModelComponentsParam(
            growth=growth,  # 'linear', 'quadratic', 'sqrt'
        )
    computation = ComputationParam(n_jobs=inner_n_jobs, verbose=verbose)
    if holiday:  # also 'auto'
        model_components.events = {
            # These holidays as well as their pre/post dates are modeled as individual events.
            "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES,  # all holidays in "holiday_lookup_countries"
            "holiday_lookup_countries": [
                holiday_country
            ],  # only look up holidays in the United States
            "holiday_pre_num_days": 1,  # also mark the 1 days before a holiday as holiday
            "holiday_post_num_days": 1,  # also mark the 1 days after a holiday as holiday
        }
    config = ForecastConfig(
        model_template=model_template,
        forecast_horizon=forecast_length,
        coverage=prediction_interval,
        model_components_param=model_components,
        metadata_param=metadata,
        computation_param=computation,
    )
    result = forecaster.run_forecast_config(  # result is also stored as `forecaster.forecast_result`.
        df=inner_df,
        config=config,
    )
    res_df = result.forecast.df.tail(forecast_length).drop(columns=['actual'])
    res_df['series_id'] = col
    return res_df
예제 #13
0
def test_run_template_5():
    """Runs custom template with monthly data, auto-regression and lagged regressors"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols_all = ["regressor1", "regressor2", "regressor_categ"]
    reg_cols = ["regressor1"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols_all
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=reg_cols),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        lagged_regressors={
            "lagged_regressor_dict": [
                {"regressor2": "auto"},
                {"regressor_categ": {"lag_dict": {"orders": [5]}}}
            ]},
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.46, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
        # Checks lagged regressor columns
        actual_pred_cols = set(result.model[-1].model_dict["pred_cols"])
        actual_x_mat_cols = set(result.model[-1].model_dict["x_mat"].columns)
        expected_pred_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5'
        }
        expected_x_mat_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5[T.c2]',
            'regressor_categ_lag5[T.c2]'
        }
        assert expected_pred_cols.issubset(actual_pred_cols)
        assert expected_x_mat_cols.issubset(actual_x_mat_cols)
예제 #14
0
def test_get_template_class():
    """Tests `__get_template_class`"""
    forecaster = Forecaster()
    assert forecaster._Forecaster__get_template_class(
    ) == SimpleSilverkiteTemplate
    assert forecaster._Forecaster__get_template_class(config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE_WEEKLY.name
    )) == SimpleSilverkiteTemplate
    assert forecaster._Forecaster__get_template_class(config=ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name)) == ProphetTemplate
    assert forecaster._Forecaster__get_template_class(config=ForecastConfig(
        model_template=ModelTemplateEnum.SK.name)) == SilverkiteTemplate

    # list `model_template`
    model_template = [
        ModelTemplateEnum.SILVERKITE.name,
        ModelTemplateEnum.SILVERKITE_DAILY_90.name,
        SimpleSilverkiteTemplateOptions()
    ]
    forecaster = Forecaster()
    assert forecaster._Forecaster__get_template_class(config=ForecastConfig(
        model_template=model_template)) == SimpleSilverkiteTemplate

    # `model_template` name is wrong
    model_template = "SOME_TEMPLATE"
    with pytest.raises(
            ValueError,
            match=f"Model Template '{model_template}' is not recognized! "
            f"Must be one of: SILVERKITE, SILVERKITE_DAILY_90, "
            f"SILVERKITE_WEEKLY, SILVERKITE_HOURLY_1, SILVERKITE_HOURLY_24, "
            f"SILVERKITE_HOURLY_168, SILVERKITE_HOURLY_336, SILVERKITE_EMPTY, "
            f"SK, PROPHET or satisfy the `SimpleSilverkiteTemplate` rules."):
        forecaster = Forecaster()
        forecaster._Forecaster__get_template_class(config=ForecastConfig(
            model_template=model_template))

    # List of `model_template` that include names not compatible with `SimpleSilverkiteTemplate`.
    model_template = [
        ModelTemplateEnum.SK.name, ModelTemplateEnum.SILVERKITE.name,
        ModelTemplateEnum.SILVERKITE_DAILY_90.name,
        SimpleSilverkiteTemplateOptions()
    ]
    with pytest.raises(
            ValueError,
            match="All model templates must use the same template class"):
        forecaster = Forecaster()
        forecaster._Forecaster__get_template_class(config=ForecastConfig(
            model_template=model_template))

    # list of `model_template` not supported by template class
    model_template = [ModelTemplateEnum.SK.name, ModelTemplateEnum.SK.name]
    with pytest.raises(
            ValueError,
            match="The template class <class "
            "'greykite.framework.templates.silverkite_template.SilverkiteTemplate'> "
            "does not allow `model_template` to be a list"):
        forecaster = Forecaster()
        forecaster._Forecaster__get_template_class(config=ForecastConfig(
            model_template=model_template))

    # List of `model_components_param` not compatible with `model_template`.
    model_template = ModelTemplateEnum.SK.name
    config = ForecastConfig(model_template=model_template,
                            model_components_param=[
                                ModelComponentsParam(),
                                ModelComponentsParam()
                            ])
    with pytest.raises(
            ValueError,
            match=
            f"Model template {model_template} does not support a list of `ModelComponentsParam`."
    ):
        forecaster = Forecaster()
        forecaster._Forecaster__get_template_class(config=config)

    # List of a single `model_components_param` is acceptable for a model template
    # that does not accept multiple `model_components_param`.
    forecaster = Forecaster()
    config = ForecastConfig(model_template=model_template,
                            model_components_param=[ModelComponentsParam()])
    forecaster._Forecaster__get_template_class(config=config)
    # List of multiple `model_components_param` is accepted by SILVERKITE
    config = ForecastConfig(model_template=ModelTemplateEnum.SILVERKITE.name,
                            model_components_param=[
                                ModelComponentsParam(),
                                ModelComponentsParam()
                            ])
    forecaster._Forecaster__get_template_class(config=config)

    # Error for unrecognized model template when there is no simple silverkite template
    model_template = "UNKNOWN"
    with pytest.raises(
            ValueError,
            match=rf"Model Template '{model_template}' is not recognized! "
            rf"Must be one of: SK, PROPHET\."):
        forecaster = Forecaster(
            model_template_enum=MissingSimpleSilverkiteTemplateEnum,
            default_model_template_name="SK",
        )
        forecaster._Forecaster__get_template_class(config=ForecastConfig(
            model_template=model_template))

    # Custom `model_template_enum`
    forecaster = Forecaster(
        model_template_enum=MyModelTemplateEnum,
        default_model_template_name="MYSILVERKITE",
    )
    assert forecaster._Forecaster__get_template_class(
    ) == MySimpleSilverkiteTemplate

    model_template = ModelTemplateEnum.PROPHET.name  # `model_template` name is wrong
    with pytest.raises(
            ValueError,
            match=f"Model Template '{model_template}' is not recognized! "
            f"Must be one of: MYSILVERKITE, SILVERKITE or satisfy the `SimpleSilverkiteTemplate` rules."
    ):
        forecaster._Forecaster__get_template_class(config=ForecastConfig(
            model_template=model_template))

    model_template = SimpleSilverkiteTemplateOptions()  # dataclass
    with LogCapture(LOGGER_NAME) as log_capture:
        forecaster._Forecaster__get_template_class(config=ForecastConfig(
            model_template=model_template))
        log_capture.check((
            LOGGER_NAME, 'DEBUG',
            'Model template SimpleSilverkiteTemplateOptions(freq=<SILVERKITE_FREQ.DAILY: '
            "'DAILY'>, seas=<SILVERKITE_SEAS.LT: 'LT'>, gr=<SILVERKITE_GR.LINEAR: "
            "'LINEAR'>, cp=<SILVERKITE_CP.NONE: 'NONE'>, hol=<SILVERKITE_HOL.NONE: "
            "'NONE'>, feaset=<SILVERKITE_FEASET.OFF: 'OFF'>, "
            "algo=<SILVERKITE_ALGO.LINEAR: 'LINEAR'>, ar=<SILVERKITE_AR.OFF: 'OFF'>, "
            "dsi=<SILVERKITE_DSI.AUTO: 'AUTO'>, wsi=<SILVERKITE_WSI.AUTO: 'AUTO'>) is "
            'not found in the template enum. Checking if model template is suitable for '
            '`SimpleSilverkiteTemplate`.'
        ), (
            LOGGER_NAME, 'DEBUG',
            'Multiple template classes could be used for the model template '
            "SimpleSilverkiteTemplateOptions(freq=<SILVERKITE_FREQ.DAILY: 'DAILY'>, "
            "seas=<SILVERKITE_SEAS.LT: 'LT'>, gr=<SILVERKITE_GR.LINEAR: 'LINEAR'>, "
            "cp=<SILVERKITE_CP.NONE: 'NONE'>, hol=<SILVERKITE_HOL.NONE: 'NONE'>, "
            "feaset=<SILVERKITE_FEASET.OFF: 'OFF'>, algo=<SILVERKITE_ALGO.LINEAR: "
            "'LINEAR'>, ar=<SILVERKITE_AR.OFF: 'OFF'>, dsi=<SILVERKITE_DSI.AUTO: "
            "'AUTO'>, wsi=<SILVERKITE_WSI.AUTO: 'AUTO'>): [<class "
            "'test_forecaster.MySimpleSilverkiteTemplate'>, <class "
            "'greykite.framework.templates.simple_silverkite_template.SimpleSilverkiteTemplate'>]"
        ), (LOGGER_NAME, 'DEBUG',
            "Using template class <class 'test_forecaster.MySimpleSilverkiteTemplate'> "
            'for the model template '
            "SimpleSilverkiteTemplateOptions(freq=<SILVERKITE_FREQ.DAILY: 'DAILY'>, "
            "seas=<SILVERKITE_SEAS.LT: 'LT'>, gr=<SILVERKITE_GR.LINEAR: 'LINEAR'>, "
            "cp=<SILVERKITE_CP.NONE: 'NONE'>, hol=<SILVERKITE_HOL.NONE: 'NONE'>, "
            "feaset=<SILVERKITE_FEASET.OFF: 'OFF'>, algo=<SILVERKITE_ALGO.LINEAR: "
            "'LINEAR'>, ar=<SILVERKITE_AR.OFF: 'OFF'>, dsi=<SILVERKITE_DSI.AUTO: "
            "'AUTO'>, wsi=<SILVERKITE_WSI.AUTO: 'AUTO'>)"))
예제 #15
0
def benchmark_silverkite_template(
        data_name,
        df,
        forecast_horizons,
        fit_algorithms,
        max_cvs,
        metadata=None,
        evaluation_metric=None):
    """Benchmarks silverkite template and returns the output as a list

    :param data_name: str
        Name of the dataset we are performing benchmarking on
        For real datasets, the data_name matches the corresponding filename in the data/ folder
        For simulated datasets, we follow the convention "<freq>_simulated" e.g. "daily_simulated"
    :param df: pd.DataFrame
        Dataframe containing the time and value columns
    :param forecast_horizons: List[int]
        One forecast is created for every given forecast_horizon
    :param fit_algorithms: List[str]
        Names of predictive models to fit.
        Options are "linear", "lasso", "ridge", "rf" etc.
    :param max_cvs: List[int] or None
        Number of maximum CV folds to use.
    :param metadata: :class:`~greykite.framework.templates.autogen.forecast_config.MetadataParam` or None, default None
        Information about the input data. See
        :class:`~greykite.framework.templates.autogen.forecast_config.MetadataParam`.
    :param evaluation_metric: :class:`~greykite.framework.templates.autogen.forecast_config.EvaluationMetricParam` or None, default None
        What metrics to evaluate. See
        :class:`~greykite.framework.templates.autogen.forecast_config.EvaluationMetricParam`.
    :return: .csv file
        Each row of the .csv file records the following outputs from one run of the silverkite template:

            - "data_name": Fixed string "<freq>_simulated", or name of the dataset in data/ folder
            - "forecast_model_name": "silverkite_<fit_algorithm>" e.g. "silverkite_linear" or "prophet"
            - "train_period": train_period
            - "forecast_horizon": forecast_horizon
            - "fit_algorithm": fit algorithm name
            - "cv_folds": max_cv
            - "runtime_sec": runtime in seconds
            - "train_mae": Mean Absolute Error of training data in backtest
            - "train_mape": Mean Absolute Percent Error of training data in backtest
            - "test_mae": Mean Absolute Error of testing data in backtest
            - "test_mape": Mean Absolute Percent Error of testing data in backtest
    """
    benchmark_results = []

    for forecast_horizon, fit_algorithm, max_cv in itertools.product(forecast_horizons, fit_algorithms, max_cvs):
        model_components = ModelComponentsParam(
            custom={
                "fit_algorithm_dict": {
                    "fit_algorithm": fit_algorithm,
                },
                "feature_sets_enabled": True
            }
        )
        evaluation_period = EvaluationPeriodParam(
            cv_max_splits=max_cv
        )

        start_time = timeit.default_timer()
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(
            df=df,
            config=ForecastConfig(
                model_template=ModelTemplateEnum.SILVERKITE.name,
                forecast_horizon=forecast_horizon,
                metadata_param=metadata,
                evaluation_metric_param=evaluation_metric,
                model_components_param=model_components,
                evaluation_period_param=evaluation_period,
            )
        )
        runtime = timeit.default_timer() - start_time

        output_dict = dict(
            data_name=data_name,
            forecast_model_name=f"silverkite_{fit_algorithm}",
            train_period=df.shape[0],
            forecast_horizon=forecast_horizon,
            cv_folds=result.grid_search.n_splits_,
            runtime_sec=round(runtime, 3),
            train_mae=result.backtest.train_evaluation["MAE"].round(3),
            train_mape=result.backtest.train_evaluation["MAPE"].round(3),
            test_mae=result.backtest.test_evaluation["MAE"].round(3),
            test_mape=result.backtest.test_evaluation["MAPE"].round(3)
        )
        benchmark_results.append(output_dict)

    return benchmark_results
예제 #16
0
def test_run_forecast_config_with_single_simple_silverkite_template():
    # The generic name of single simple silverkite templates are not added to `ModelTemplateEnum`,
    # therefore we test if these are recognized.
    data = generate_df_for_tests(freq="D", periods=365)
    df = data["df"]
    metric = EvaluationMetricEnum.MeanAbsoluteError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name,
                                              agg_periods=7,
                                              agg_func=np.max,
                                              null_model_params={
                                                  "strategy": "quantile",
                                                  "constant": None,
                                                  "quantile": 0.5
                                              })

    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=2)

    model_components = ModelComponentsParam(
        hyperparameter_override=[{
            "estimator__yearly_seasonality": 1
        }, {
            "estimator__yearly_seasonality": 2
        }])
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90

    single_template_class = SimpleSilverkiteTemplateOptions(
        freq=SILVERKITE_COMPONENT_KEYWORDS.FREQ.value.DAILY,
        seas=SILVERKITE_COMPONENT_KEYWORDS.SEAS.value.NONE)

    forecast_config = ForecastConfig(model_template=[
        single_template_class, "DAILY_ALGO_SGD", "SILVERKITE_DAILY_90"
    ],
                                     computation_param=computation,
                                     coverage=coverage,
                                     evaluation_metric_param=evaluation_metric,
                                     evaluation_period_param=evaluation_period,
                                     forecast_horizon=forecast_horizon,
                                     model_components_param=model_components)

    forecaster = Forecaster()
    result = forecaster.run_forecast_config(df=df, config=forecast_config)

    summary = summarize_grid_search_results(result.grid_search)
    # single_template_class is 1 template,
    # "DAILY_ALGO_SGD" is 1 template and "SILVERKITE_DAILY_90" has 4 templates.
    # With 2 items in `hyperparameter_override, there should be a total of 12 cases.
    assert summary.shape[0] == 12

    # Tests functionality for single template class only.
    forecast_config = ForecastConfig(model_template=single_template_class,
                                     computation_param=computation,
                                     coverage=coverage,
                                     evaluation_metric_param=evaluation_metric,
                                     evaluation_period_param=evaluation_period,
                                     forecast_horizon=forecast_horizon)

    forecaster = Forecaster()
    pipeline_parameters = forecaster.apply_forecast_config(
        df=df, config=forecast_config)
    assert_equal(actual=pipeline_parameters["hyperparameter_grid"],
                 expected={
                     "estimator__time_properties": [None],
                     "estimator__origin_for_time_vars": [None],
                     "estimator__train_test_thresh": [None],
                     "estimator__training_fraction": [None],
                     "estimator__fit_algorithm_dict": [{
                         "fit_algorithm":
                         "linear",
                         "fit_algorithm_params":
                         None
                     }],
                     "estimator__holidays_to_model_separately": [[]],
                     "estimator__holiday_lookup_countries": [[]],
                     "estimator__holiday_pre_num_days": [0],
                     "estimator__holiday_post_num_days": [0],
                     "estimator__holiday_pre_post_num_dict": [None],
                     "estimator__daily_event_df_dict": [None],
                     "estimator__changepoints_dict": [None],
                     "estimator__seasonality_changepoints_dict": [None],
                     "estimator__yearly_seasonality": [0],
                     "estimator__quarterly_seasonality": [0],
                     "estimator__monthly_seasonality": [0],
                     "estimator__weekly_seasonality": [0],
                     "estimator__daily_seasonality": [0],
                     "estimator__max_daily_seas_interaction_order": [0],
                     "estimator__max_weekly_seas_interaction_order": [2],
                     "estimator__autoreg_dict": [None],
                     "estimator__min_admissible_value": [None],
                     "estimator__max_admissible_value": [None],
                     "estimator__uncertainty_dict": [None],
                     "estimator__growth_term": ["linear"],
                     "estimator__regressor_cols": [[]],
                     "estimator__feature_sets_enabled": [False],
                     "estimator__extra_pred_cols": [[]]
                 },
                 ignore_keys={"estimator__time_properties": None})
예제 #17
0
def test_run_forecast_config_custom():
    """Tests `run_forecast_config` on weekly data with custom config:

     - numeric and categorical regressors
     - coverage
     - null model
    """
    data = generate_df_with_reg_for_tests(freq="W-MON",
                                          periods=140,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]

    metric = EvaluationMetricEnum.MeanAbsoluteError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name,
                                              agg_periods=7,
                                              agg_func=np.max,
                                              null_model_params={
                                                  "strategy": "quantile",
                                                  "constant": None,
                                                  "quantile": 0.5
                                              })

    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=3)

    model_components = ModelComponentsParam(
        regressors={"regressor_cols": reg_cols},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "cv": 2
                }
            }
        })
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90

    forecast_config = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=model_components)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(df=df, config=forecast_config)

        mse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[mse] == pytest.approx(2.976,
                                                                     rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.360,
                                                                     rel=1e-2)
        assert result.forecast.train_evaluation[mse] == pytest.approx(2.224,
                                                                      rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.941,
                                                                      rel=1e-2)
        check_forecast_pipeline_result(result,
                                       coverage=coverage,
                                       strategy=None,
                                       score_func=metric.name,
                                       greater_is_better=False)

    with pytest.raises(KeyError, match="missing_regressor"):
        model_components = ModelComponentsParam(
            regressors={"regressor_cols": ["missing_regressor"]})
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(
            df=df,
            config=ForecastConfig(
                model_template=ModelTemplateEnum.SILVERKITE.name,
                model_components_param=model_components))
        check_forecast_pipeline_result(result,
                                       coverage=None,
                                       strategy=None,
                                       score_func=metric.get_metric_func(),
                                       greater_is_better=False)
예제 #18
0
def test_run_forecast_config():
    """Tests `run_forecast_config`"""
    data = generate_df_for_tests(freq="H", periods=14 * 24)
    df = data["df"]

    # Checks if exception is raised
    with pytest.raises(ValueError, match="is not recognized"):
        forecaster = Forecaster()
        forecaster.run_forecast_config(
            df=df, config=ForecastConfig(model_template="unknown_template"))
    with pytest.raises(ValueError, match="is not recognized"):
        forecaster = Forecaster()
        forecaster.run_forecast_json(
            df=df, json_str="""{ "model_template": "unknown_template" }""")

    # All run_forecast_config* functions return the same result for the default config,
    # call forecast_pipeline, and return a result with the proper format.
    np.random.seed(123)
    forecaster = Forecaster()
    default_result = forecaster.run_forecast_config(df=df)
    score_func = EvaluationMetricEnum.MeanAbsolutePercentError.name
    check_forecast_pipeline_result(default_result,
                                   coverage=None,
                                   strategy=None,
                                   score_func=score_func,
                                   greater_is_better=False)
    assert_equal(forecaster.forecast_result, default_result)

    np.random.seed(123)
    forecaster = Forecaster()
    json_result = forecaster.run_forecast_json(df=df)
    check_forecast_pipeline_result(json_result,
                                   coverage=None,
                                   strategy=None,
                                   score_func=score_func,
                                   greater_is_better=False)
    assert_forecast_pipeline_result_equal(json_result,
                                          default_result,
                                          rel=0.02)
예제 #19
0
def test_apply_forecast_config(df_config):
    """Tests `apply_forecast_config`"""
    df = df_config["df"]
    config = df_config["config"]
    model_template = df_config["model_template"]
    reg_cols = df_config["reg_cols"]

    # The same class can be re-used. `df` and `config` are taken from the function call
    #   to `apply_forecast_config`. Only `model_template_enum` and
    #   `default_model_template_name` are persistent in the state.
    forecaster = Forecaster()

    # no config
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        pipeline_params = forecaster.apply_forecast_config(df=df)

        template_class = SimpleSilverkiteTemplate  # based on `default_model_template_name`
        expected_pipeline_params = template_class(
        ).apply_template_for_pipeline_params(df=df)
        assert_basic_pipeline_equal(pipeline_params.pop("pipeline"),
                                    expected_pipeline_params.pop("pipeline"))
        assert_equal(pipeline_params, expected_pipeline_params)
        assert forecaster.config is not None
        assert forecaster.template_class == template_class
        assert isinstance(forecaster.template, forecaster.template_class)
        assert forecaster.pipeline_params is not None

    # custom config
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        pipeline_params = forecaster.apply_forecast_config(df=df,
                                                           config=config)

        template_class = ModelTemplateEnum[
            model_template].value.template_class  # SimpleSilverkiteTemplate
        expected_pipeline_params = template_class(
        ).apply_template_for_pipeline_params(df, config)
        expected_pipeline = expected_pipeline_params.pop("pipeline")
        assert_basic_pipeline_equal(pipeline_params.pop("pipeline"),
                                    expected_pipeline)
        assert_equal(pipeline_params, expected_pipeline_params)

        # Custom `model_template_enum`. Same result, because
        #   `MySimpleSilverkiteTemplate` has the same apply_template_for_pipeline_params
        #   as `SimpleSilverkiteTemplate`.
        forecaster = Forecaster(model_template_enum=MyModelTemplateEnum)
        pipeline_params = forecaster.apply_forecast_config(df=df,
                                                           config=config)
        assert_basic_pipeline_equal(pipeline_params.pop("pipeline"),
                                    expected_pipeline)
        assert_equal(pipeline_params, expected_pipeline_params)

    # `model_component` of config is incompatible with model_template
    forecaster = Forecaster()
    config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name,
                            model_components_param=ModelComponentsParam(
                                regressors={"regressor_cols": reg_cols}))
    with pytest.raises(ValueError) as record:
        forecaster.apply_forecast_config(df=df, config=config)
        assert "Unexpected key(s) found: {\'regressor_cols\'}. The valid keys are: " \
               "dict_keys([\'add_regressor_dict\'])" in str(record)

    # metadata of config is incompatible with df
    df = df.rename(columns={
        TIME_COL: "some_time_col",
        VALUE_COL: "some_value_col"
    })
    with pytest.raises(ValueError, match="ts column is not in input data"):
        forecaster.apply_forecast_config(df=df, config=config)
예제 #20
0
        "regularization_strength": 0.5,
        "resample_freq": "7D",
        "no_changepoint_distance_from_end": "365D"
    }
},
                                        uncertainty={
                                            "uncertainty_dict": "auto",
                                        },
                                        custom={
                                            "fit_algorithm_dict": {
                                                "fit_algorithm": "linear",
                                            },
                                        })

# Runs the forecast
forecaster = Forecaster()
result = forecaster.run_forecast_config(
    df=df,
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        forecast_horizon=365,  # forecasts 365 steps ahead
        coverage=0.95,  # 95% prediction intervals
        metadata_param=metadata,
        model_components_param=model_components))

# %%
# Creating model summary
# ^^^^^^^^^^^^^^^^^^^^^^
# Now that we have the output from :py:meth:`~greykite.framework.templates.forecaster.Forecaster.run_forecast_config`,
# we are able to access the model summary.
예제 #21
0
    time_col="ts",  # name of the time column ("date" in example above)
    value_col="y",  # name of the value column ("sessions" in example above)
    freq="D"  # "H" for hourly, "D" for daily, "W" for weekly, etc.
    # Any format accepted by `pandas.date_range`
)

# %%
# Create a forecast
# -----------------
# You can pick the ``PROPHET`` or ``SILVERKITE``
# forecasting model template. (see :doc:`/pages/stepbystep/0100_choose_model`).
#
# In this example, we use ``SILVERKITE``.
# You may also use ``PROPHET`` to see how a third-party library
# is leveraged in the same framework.
forecaster = Forecaster()  # Creates forecasts and stores the result
result = forecaster.run_forecast_config(  # result is also stored as `forecaster.forecast_result`.
    df=df,
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        forecast_horizon=365,  # forecasts 365 steps ahead
        coverage=0.95,  # 95% prediction intervals
        metadata_param=metadata))

# %%
# Check results
# -------------
# The output of ``run_forecast_config`` is a dictionary that contains
# the future forecast, historical forecast performance, and
# the original timeseries.
예제 #22
0
def test_run_prophet_template_custom():
    """Tests running prophet template through the pipeline"""
    data = generate_df_with_reg_for_tests(freq="D",
                                          periods=50,
                                          train_frac=0.8,
                                          conti_year_origin=2018,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    # select relevant columns for testing
    relevant_cols = [
        cst.TIME_COL, cst.VALUE_COL, "regressor1", "regressor2", "regressor3"
    ]
    df = data["df"][relevant_cols]
    forecast_horizon = data["fut_time_num"]

    # Model components - custom holidays; other params as defaults
    model_components = ModelComponentsParam(
        seasonality={
            "seasonality_mode": ["additive"],
            "yearly_seasonality": ["auto"],
            "weekly_seasonality": [True],
            "daily_seasonality": ["auto"],
        },
        growth={"growth_term": ["linear"]},
        events={
            "holiday_pre_num_days": [1],
            "holiday_post_num_days": [1],
            "holidays_prior_scale": [1.0]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "n_changepoints": [1],
            "changepoint_range": [0.5],
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }]
        },
        uncertainty={"uncertainty_samples": [10]})

    metadata = MetadataParam(
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL,
        freq="D",
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=5,  # speeds up test case
        periods_between_train_test=5,
        cv_horizon=0,  # speeds up test case
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name,
        metadata_param=metadata,
        forecast_horizon=forecast_horizon,
        coverage=0.95,
        model_components_param=model_components,
        evaluation_period_param=evaluation_period,
    )
    result = Forecaster().run_forecast_config(
        df=df,
        config=config,
    )

    forecast_df = result.forecast.df_test.reset_index(drop=True)
    expected_cols = [
        "ts", "actual", "forecast", "forecast_lower", "forecast_upper"
    ]
    assert list(forecast_df.columns) == expected_cols
    assert result.backtest.coverage == 0.95, "coverage is not correct"
    # NB: coverage is poor because of very small dataset size and low uncertainty_samples
    assert result.backtest.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.677, rel=1e-3), \
        "training coverage is None or less than expected"
    assert result.backtest.test_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.800, rel=1e-3), \
        "testing coverage is None or less than expected"
    assert result.backtest.train_evaluation["MSE"] == pytest.approx(3.7849, rel=1e-3), \
        "training MSE is None or more than expected"
    assert result.backtest.test_evaluation["MSE"] == pytest.approx(2.9609, rel=1e-3), \
        "testing MSE is None or more than expected"
    assert result.forecast.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.7805, rel=1e-3), \
        "forecast coverage is None or less than expected"
    assert result.forecast.train_evaluation["MSE"] == pytest.approx(4.1806, rel=1e-3), \
        "forecast MSE is None or more than expected"

    # ensure regressors were used in the model
    prophet_estimator = result.model.steps[-1][-1]
    regressors = prophet_estimator.model.extra_regressors
    assert regressors.keys() == {"regressor1", "regressor2", "regressor3"}
    assert regressors["regressor1"]["prior_scale"] == 10.0
    assert regressors["regressor1"]["standardize"] is True
    assert regressors["regressor1"]["mode"] == "additive"
    assert regressors["regressor2"]["prior_scale"] == 15.0
    assert regressors["regressor3"]["standardize"] == "auto"
def test_run_auto_arima_template_custom():
    """Tests running auto arima template through the pipeline"""
    data = generate_df_with_reg_for_tests(freq="D",
                                          periods=50,
                                          train_frac=0.8,
                                          conti_year_origin=2018,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    # select relevant columns for testing
    relevant_cols = [
        cst.TIME_COL, cst.VALUE_COL, "regressor1", "regressor2", "regressor3"
    ]
    df = data["df"][relevant_cols]
    forecast_horizon = data["fut_time_num"]

    # Model components - custom holidays; other params as defaults
    model_components = ModelComponentsParam(
        # Everything except `custom` and `hyperparameter_override` are ignored
        seasonality={
            "seasonality_mode": ["additive"],
            "yearly_seasonality": ["auto"],
            "weekly_seasonality": [True],
            "daily_seasonality": ["auto"],
        },
        growth={"growth_term": ["linear"]},
        events={
            "holiday_pre_num_days": [1],
            "holiday_post_num_days": [1],
            "holidays_prior_scale": [1.0]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "n_changepoints": [1],
            "changepoint_range": [0.5],
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }]
        },
        uncertainty={"uncertainty_samples": [10]},
        custom={
            "max_order": [10],
            "information_criterion": ["bic"]
        })

    metadata = MetadataParam(
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL,
        freq="D",
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=5,  # speeds up test case
        periods_between_train_test=5,
        cv_horizon=0,  # speeds up test case
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.AUTO_ARIMA.name,
        metadata_param=metadata,
        forecast_horizon=forecast_horizon,
        coverage=0.95,
        model_components_param=model_components,
        evaluation_period_param=evaluation_period,
    )
    result = Forecaster().run_forecast_config(
        df=df,
        config=config,
    )

    forecast_df = result.forecast.df_test.reset_index(drop=True)
    expected_cols = [
        "ts", "actual", "forecast", "forecast_lower", "forecast_upper"
    ]
    assert list(forecast_df.columns) == expected_cols
    assert result.backtest.coverage == 0.95, "coverage is not correct"
    # NB: coverage is poor because of very small dataset size and low uncertainty_samples
    assert result.backtest.train_evaluation[
        cst.PREDICTION_BAND_COVERAGE] is not None
    assert result.backtest.test_evaluation[
        cst.PREDICTION_BAND_COVERAGE] is not None
    assert result.backtest.train_evaluation["MSE"] is not None
    assert result.backtest.test_evaluation["MSE"] is not None
    assert result.forecast.train_evaluation[
        cst.PREDICTION_BAND_COVERAGE] is not None
    assert result.forecast.train_evaluation["MSE"] is not None