def test_get_config_with_default_model_template_and_components(): """Tests `__get_config_with_default_model_template_and_components`""" forecaster = Forecaster() config = forecaster._Forecaster__get_config_with_default_model_template_and_components( ) assert config == ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, model_components_param=ModelComponentsParam()) # Overrides `default_model_template_name`, unnests `model_components_param`. forecaster = Forecaster(default_model_template_name="SK") config = ForecastConfig(model_components_param=[ModelComponentsParam()]) config = forecaster._Forecaster__get_config_with_default_model_template_and_components( config) assert config == ForecastConfig( model_template=ModelTemplateEnum.SK.name, model_components_param=ModelComponentsParam()) # Overrides `model_template_enum` and `default_model_template_name` forecaster = Forecaster(model_template_enum=MyModelTemplateEnum, default_model_template_name="MYSILVERKITE") config = forecaster._Forecaster__get_config_with_default_model_template_and_components( ) assert config == ForecastConfig( model_template=MyModelTemplateEnum.MYSILVERKITE.name, model_components_param=ModelComponentsParam())
def test_get_regressor_cols(): """Tests get_regressor_names""" template = AutoArimaTemplate() # no regressors model_components = ModelComponentsParam() template.config = ForecastConfig(model_components_param=model_components) assert template.get_regressor_cols() is None model_components = ModelComponentsParam(regressors={}) template.config = ForecastConfig(model_components_param=model_components) assert template.get_regressor_cols() is None
def test_prophet_template_default(): """Tests prophet_template with default values, for limited data""" # prepares input data num_days = 10 data = generate_df_for_tests(freq="D", periods=num_days, train_start_date="2018-01-01") df = data["df"] template = ProphetTemplate() config = ForecastConfig(model_template="PROPHET") params = template.apply_template_for_pipeline_params(df=df, config=config) # not modified assert config == ForecastConfig(model_template="PROPHET") # checks result metric = EvaluationMetricEnum.MeanAbsolutePercentError pipeline = params.pop("pipeline", None) expected_params = dict( df=df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL, date_format=None, freq=None, train_end_date=None, anomaly_info=None, # model regressor_cols=None, lagged_regressor_cols=None, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=None, n_jobs=COMPUTATION_N_JOBS, verbose=1, # forecast forecast_horizon=None, coverage=None, test_horizon=None, periods_between_train_test=None, agg_periods=None, agg_func=None, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, null_model_params=None, relative_error_tolerance=None, # CV cv_horizon=None, cv_min_train_periods=None, cv_expanding_window=True, cv_periods_between_splits=None, cv_periods_between_train_test=None, cv_max_splits=3) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_silverkite_template(): """Tests test_silverkite_template with default config""" data = generate_df_for_tests(freq="D", periods=10) df = data["df"] template = SilverkiteTemplate() config = ForecastConfig(model_template="SK") params = template.apply_template_for_pipeline_params( df=df, config=config ) assert config == ForecastConfig(model_template="SK") # not modified pipeline = params.pop("pipeline", None) metric = EvaluationMetricEnum.MeanAbsolutePercentError expected_params = dict( df=df, time_col=TIME_COL, value_col=VALUE_COL, date_format=None, freq=None, train_end_date=None, anomaly_info=None, # model regressor_cols=None, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=None, n_jobs=COMPUTATION_N_JOBS, verbose=1, # forecast forecast_horizon=None, coverage=None, test_horizon=None, periods_between_train_test=None, agg_periods=None, agg_func=None, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, null_model_params=None, relative_error_tolerance=None, # CV cv_horizon=None, cv_min_train_periods=None, cv_expanding_window=True, cv_periods_between_splits=None, cv_periods_between_train_test=None, cv_max_splits=3 ) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_run_template_1(): """Runs default template""" data = generate_df_for_tests( freq="H", periods=700 * 24) df = data["train_df"] forecast_horizon = data["test_df"].shape[0] config = ForecastConfig( model_template=ModelTemplateEnum.SK.name, forecast_horizon=forecast_horizon, ) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = Forecaster().run_forecast_config( df=df, config=config, ) rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name() q80 = EvaluationMetricEnum.Quantile80.get_metric_name() assert result.backtest.test_evaluation[rmse] == pytest.approx(2.037, rel=1e-2) assert result.backtest.test_evaluation[q80] == pytest.approx(0.836, rel=1e-2) assert result.forecast.train_evaluation[rmse] == pytest.approx(2.004, rel=1e-2) assert result.forecast.train_evaluation[q80] == pytest.approx(0.800, rel=1e-2) check_forecast_pipeline_result( result, coverage=None, strategy=None, score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name, greater_is_better=False)
def __get_config_with_default_model_template_and_components( self, config: Optional[ForecastConfig] = None) -> ForecastConfig: """Gets config with default value for `model_template` and `model_components_param` if not provided. - model_template : default value is ``self.default_model_template_name``. - model_components_param : default value is an empty ModelComponentsParam(). Parameters ---------- config : :class:`~greykite.framework.templates.model_templates.ForecastConfig` or None Config object for template class to use. See :class:`~greykite.framework.templates.model_templates.ForecastConfig`. If None, uses an empty ForecastConfig. Returns ------- config : :class:`~greykite.framework.templates.model_templates.ForecastConfig` Input ``config`` with default ``model_template`` populated. If ``config.model_template`` is None, it is set to ``self.default_model_template_name``. If ``config.model_components_param`` is None, it is set to ``ModelComponentsParam()``. """ config = config if config is not None else ForecastConfig() # Unpacks list of a single element and sets default value if None. # NB: Does not call `apply_forecast_config_defaults`. # Only sets `model_template` and `model_components_param`. # The template class may have its own implementation of forecast config defaults. forecast_config_defaults = ForecastConfigDefaults() forecast_config_defaults.DEFAULT_MODEL_TEMPLATE = self.default_model_template_name config.model_template = forecast_config_defaults.apply_model_template_defaults( config.model_template) config.model_components_param = forecast_config_defaults.apply_model_components_defaults( config.model_components_param) return config
def test_apply_template_for_pipeline_params(df): mt = MyTemplate() config = ForecastConfig(metadata_param=MetadataParam( time_col=NEW_TIME_COL, value_col=NEW_VALUE_COL, ), evaluation_metric_param=EvaluationMetricParam( cv_selection_metric="MeanSquaredError")) original_config = dataclasses.replace(config) # Tests apply_template_for_pipeline_params pipeline_params = mt.apply_template_for_pipeline_params(df=df, config=config) assert_equal(pipeline_params["df"], df) assert pipeline_params["train_end_date"] is None estimator = pipeline_params["pipeline"].steps[-1][-1] assert isinstance(estimator, SilverkiteEstimator) assert estimator.coverage == mt.config.coverage assert mt.estimator is not estimator assert mt.estimator.coverage is None assert ( pipeline_params["pipeline"].named_steps["input"].transformer_list[2] [1].named_steps["select_reg"].column_names == mt.get_regressor_cols()) # Tests `apply_template_decorator` assert mt.config == mt.apply_forecast_config_defaults(config) assert mt.config != config # `mt.config` has default values added assert config == original_config # `config` is not modified by the function
def test_get_forecast_time_properties(df): mt = MyTemplate() mt.df = df # with `train_end_date` (masking applied) mt.config = ForecastConfig( coverage=0.9, forecast_horizon=20, metadata_param=MetadataParam( time_col=NEW_TIME_COL, value_col=NEW_VALUE_COL, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 2, 1), ) ) mt.regressor_cols = mt.get_regressor_cols() mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"] time_properties = mt.get_forecast_time_properties() period = 3600 # seconds between observations time_delta = (mt.config.metadata_param.train_end_date - df[mt.config.metadata_param.time_col].min()) # train end - train start num_training_days = (time_delta.days + (time_delta.seconds + period) / TimeEnum.ONE_DAY_IN_SECONDS.value) assert time_properties["num_training_days"] == num_training_days # without `train_end_date` mt.config.metadata_param.train_end_date = None time_properties = mt.get_forecast_time_properties() time_delta = (datetime.datetime(2019, 2, 26) - df[mt.config.metadata_param.time_col].min()) # by default, train end is the last date with nonnull value_col num_training_days = (time_delta.days + (time_delta.seconds + period) / TimeEnum.ONE_DAY_IN_SECONDS.value) assert time_properties["num_training_days"] == num_training_days
def test_get_pipeline(df): mt = MyTemplate() # Initializes attributes needed by the function mt.regressor_cols = mt.get_regressor_cols() mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"] metric = EvaluationMetricEnum.MeanSquaredError mt.score_func = metric.name mt.score_func_greater_is_better = metric.get_metric_greater_is_better() mt.config = ForecastConfig( coverage=0.9, evaluation_metric_param=EvaluationMetricParam( cv_selection_metric=metric.name ) ) # Checks get_pipeline output pipeline = mt.get_pipeline() assert isinstance(pipeline, sklearn.pipeline.Pipeline) estimator = pipeline.steps[-1][-1] assert isinstance(estimator, SilverkiteEstimator) assert estimator.coverage == mt.config.coverage assert mt.estimator is not estimator assert mt.estimator.coverage is None expected_col_names = ["regressor1", "regressor2", "regressor_categ", "regressor_bool"] assert pipeline.named_steps["input"].transformer_list[2][1].named_steps["select_reg"].column_names == expected_col_names assert_eval_function_equal(pipeline.steps[-1][-1].score_func, metric.get_metric_func())
def test_estimator_get_coef_summary_from_forecaster(): """Tests model summary for silverkite model with missing values in value_col after everything is setup by Forecaster""" dl = DataLoader() df_pt = dl.load_peyton_manning() config = ForecastConfig().from_dict( dict(model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=10, metadata_param=dict(time_col="ts", value_col="y", freq="D"), model_components_param=dict( custom={"fit_algorithm_dict": { "fit_algorithm": "linear" }}))) result = Forecaster().run_forecast_config( df=df_pt[:365], # shortens df to speed up config=config) summary = result.model[-1].summary() x = summary.get_coef_summary(is_intercept=True, return_df=True) assert x.shape[0] == 1 summary.get_coef_summary(is_time_feature=True) summary.get_coef_summary(is_event=True) summary.get_coef_summary(is_trend=True) summary.get_coef_summary(is_interaction=True) x = summary.get_coef_summary(is_lag=True) assert x is None x = summary.get_coef_summary(is_trend=True, is_seasonality=False, is_interaction=False, return_df=True) assert all([":" not in col for col in x["Pred_col"].tolist()]) assert "ct1" in x["Pred_col"].tolist() assert "sin1_ct1_yearly" not in x["Pred_col"].tolist() x = summary.get_coef_summary(return_df=True) assert x.shape[0] == summary.info_dict["coef_summary_df"].shape[0]
def test_forecast_config(): """Tests ForecastConfig dataclass""" config = ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, metadata_param=MetadataParam(time_col="custom_time_col", anomaly_info=[{ "key": "value" }, { "key2": "value2" }]), evaluation_period_param=EvaluationPeriodParam( test_horizon=10, periods_between_train_test=5, cv_min_train_periods=20), evaluation_metric_param=EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanSquaredError.name, cv_report_metrics=[ EvaluationMetricEnum.MeanAbsoluteError.name, EvaluationMetricEnum.MeanAbsolutePercentError.name ], relative_error_tolerance=0.02), model_components_param=ModelComponentsParam( autoregression={"autoreg_dict": { "autoreg_param": 0 }}, changepoints=None, custom={"custom_param": 1}, growth={"growth_param": 2}, events={"events_param": 3}, hyperparameter_override=[{ "h1": 4 }, { "h2": 5 }, None], regressors={"names": ["regressor1", "regressor2"]}, lagged_regressors={"lagged_regressor_dict": { "lag_reg_param": 0 }}, seasonality={"seas_param": 6}, uncertainty={"uncertainty_param": 7}), computation_param=ComputationParam(n_jobs=None)) assert_forecast_config(config) # Tests a string passed to `cv_report_metrics` assert ForecastConfig(evaluation_metric_param=EvaluationMetricParam( cv_report_metrics=CV_REPORT_METRICS_ALL), ).to_dict()
def test_apply_template_decorator(): data = generate_df_for_tests(freq="D", periods=10) df = data["df"] template = ProphetTemplate() with pytest.raises( ValueError, match= "ProphetTemplate only supports config.model_template='PROPHET', found 'UNKNOWN'" ): template.apply_template_for_pipeline_params( df=df, config=ForecastConfig(model_template="UNKNOWN"))
def df_config(): data = generate_df_with_reg_for_tests(freq="W-MON", periods=140, remove_extra_cols=True, mask_test_actuals=True) reg_cols = ["regressor1", "regressor2", "regressor_categ"] keep_cols = [TIME_COL, VALUE_COL] + reg_cols df = data["df"][keep_cols] model_template = "SILVERKITE" evaluation_metric = EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name, agg_periods=7, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.5 }) evaluation_period = EvaluationPeriodParam(test_horizon=10, periods_between_train_test=5, cv_horizon=4, cv_min_train_periods=80, cv_expanding_window=False, cv_periods_between_splits=20, cv_periods_between_train_test=3, cv_max_splits=3) model_components = ModelComponentsParam( regressors={"regressor_cols": reg_cols}, custom={ "fit_algorithm_dict": { "fit_algorithm": "ridge", "fit_algorithm_params": { "cv": 2 } } }) computation = ComputationParam(verbose=2) forecast_horizon = 27 coverage = 0.90 config = ForecastConfig(model_template=model_template, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=model_components) return { "df": df, "config": config, "model_template": model_template, "reg_cols": reg_cols, }
def test_template_interface(): mt = MyTemplate() assert mt.df is None assert mt.config is None assert mt.pipeline_params is None assert mt.allow_model_template_list is False assert mt.allow_model_components_param_list is False df = pandas.DataFrame({"a": [1, 2, 3]}) assert mt.apply_template_for_pipeline_params( df=df, config=ForecastConfig()) == {"value": df.shape[0]}
def test_run_forecast_config(): """Tests `run_forecast_config`""" data = generate_df_for_tests(freq="H", periods=14 * 24) df = data["df"] # Checks if exception is raised with pytest.raises(ValueError, match="is not recognized"): forecaster = Forecaster() forecaster.run_forecast_config( df=df, config=ForecastConfig(model_template="unknown_template")) with pytest.raises(ValueError, match="is not recognized"): forecaster = Forecaster() forecaster.run_forecast_json( df=df, json_str="""{ "model_template": "unknown_template" }""") # All run_forecast_config* functions return the same result for the default config, # call forecast_pipeline, and return a result with the proper format. np.random.seed(123) forecaster = Forecaster() default_result = forecaster.run_forecast_config(df=df) score_func = EvaluationMetricEnum.MeanAbsolutePercentError.name check_forecast_pipeline_result(default_result, coverage=None, strategy=None, score_func=score_func, greater_is_better=False) assert_equal(forecaster.forecast_result, default_result) np.random.seed(123) forecaster = Forecaster() json_result = forecaster.run_forecast_json(df=df) check_forecast_pipeline_result(json_result, coverage=None, strategy=None, score_func=score_func, greater_is_better=False) assert_forecast_pipeline_result_equal(json_result, default_result, rel=0.02)
def apply_forecast_config_defaults(self, config: Optional[ForecastConfig] = None ) -> ForecastConfig: """Applies the default Forecast Config values to the given config. If an expected attribute value is provided, the value is unchanged. Otherwise the default value for it is used. Other attributes are untouched. If the input config is None, it creates a Forecast Config. Parameters ---------- config : :class:`~greykite.framework.templates.autogen.forecast_config.ForecastConfig` or None Forecast configuration if available. See :class:`~greykite.framework.templates.autogen.forecast_config.ForecastConfig`. Returns ------- config : :class:`~greykite.framework.templates.model_templates.ForecastConfig` A valid Forecast Config which contains the provided attribute values and the default attribute values if not. """ if config is None: config = ForecastConfig() else: # makes a copy to avoid mutating input config = dataclasses.replace(config) config.computation_param = self.apply_computation_defaults( config.computation_param) config.evaluation_metric_param = self.apply_evaluation_metric_defaults( config.evaluation_metric_param) config.evaluation_period_param = self.apply_evaluation_period_defaults( config.evaluation_period_param) config.metadata_param = self.apply_metadata_defaults( config.metadata_param) config.model_components_param = self.apply_model_components_defaults( config.model_components_param) config.model_template = self.apply_model_template_defaults( config.model_template) return config
def test_run_template_4(): """Runs custom template with monthly data and auto-regression""" data = generate_df_with_reg_for_tests( freq="MS", periods=48, remove_extra_cols=True, mask_test_actuals=True) reg_cols = ["regressor1", "regressor2", "regressor_categ"] keep_cols = [TIME_COL, VALUE_COL] + reg_cols df = data["df"][keep_cols] forecast_horizon = data["test_df"].shape[0] model_components = ModelComponentsParam( custom=dict( fit_algorithm_dict=dict(fit_algorithm="linear"), extra_pred_cols=["ct2"]), autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))), uncertainty=dict(uncertainty_dict=None)) config = ForecastConfig( model_template=ModelTemplateEnum.SK.name, forecast_horizon=forecast_horizon, coverage=0.9, model_components_param=model_components, ) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = Forecaster().run_forecast_config( df=df, config=config, ) rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name() assert result.backtest.test_evaluation[rmse] == pytest.approx(4.95, rel=1e-1) check_forecast_pipeline_result( result, coverage=0.9, strategy=None, score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name, greater_is_better=False)
def test_estimator_plot_components_from_forecaster(): """Tests estimator's plot_components function after the Forecaster has set everything up at the top most level""" # Test with real data (Female-births) via model template dl = DataLoader() data_path = dl.get_data_home(data_sub_dir="daily") df = dl.get_df(data_path=data_path, data_name="daily_female_births") metadata = MetadataParam(time_col="Date", value_col="Births", freq="D") model_components = ModelComponentsParam( seasonality={ "yearly_seasonality": True, "quarterly_seasonality": True, "weekly_seasonality": True, "daily_seasonality": False }) result = Forecaster().run_forecast_config( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=30, # forecast 1 month coverage=0.95, # 95% prediction intervals metadata_param=metadata, model_components_param=model_components)) estimator = result.model.steps[-1][-1] assert estimator.plot_components()
def test_get_regressor_cols(): """Tests get_regressor_names""" # `add_regressor_dict` is a list of dict template = ProphetTemplate() model_components = ModelComponentsParam( regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10, "standardize": True, "mode": "additive" }, "regressor2": { "prior_scale": 15, "standardize": False, "mode": "additive" }, "regressor3": {} }, None, { "regressor1": { "prior_scale": 10, "standardize": True, "mode": "additive" }, "regressor4": { "prior_scale": 15, "standardize": False, "mode": "additive" }, "regressor5": {} }] }) template.config = ForecastConfig(model_components_param=model_components) assert set(template.get_regressor_cols()) == { "regressor1", "regressor2", "regressor3", "regressor4", "regressor5" } # `add_regressor_dict` is a single dict model_components = ModelComponentsParam( regressors={ "add_regressor_dict": { "regressor1": { "prior_scale": 10, "standardize": True, "mode": "additive" }, "regressor2": { "prior_scale": 15, "standardize": False, "mode": "additive" }, "regressor3": {} } }) template.config = ForecastConfig(model_components_param=model_components) assert set(template.get_regressor_cols()) == { "regressor1", "regressor2", "regressor3" } # no regressors model_components = ModelComponentsParam() template.config = ForecastConfig(model_components_param=model_components) assert template.get_regressor_cols() is None model_components = ModelComponentsParam(regressors={}) template.config = ForecastConfig(model_components_param=model_components) assert template.get_regressor_cols() is None model_components = ModelComponentsParam( regressors={"add_regressor_dict": []}) template.config = ForecastConfig(model_components_param=model_components) assert template.get_regressor_cols() is None model_components = ModelComponentsParam( regressors={"add_regressor_dict": [{}, None]}) template.config = ForecastConfig(model_components_param=model_components) assert template.get_regressor_cols() is None
def test_run_prophet_template_custom(): """Tests running prophet template through the pipeline""" data = generate_df_with_reg_for_tests(freq="D", periods=50, train_frac=0.8, conti_year_origin=2018, remove_extra_cols=True, mask_test_actuals=True) # select relevant columns for testing relevant_cols = [ cst.TIME_COL, cst.VALUE_COL, "regressor1", "regressor2", "regressor3" ] df = data["df"][relevant_cols] forecast_horizon = data["fut_time_num"] # Model components - custom holidays; other params as defaults model_components = ModelComponentsParam( seasonality={ "seasonality_mode": ["additive"], "yearly_seasonality": ["auto"], "weekly_seasonality": [True], "daily_seasonality": ["auto"], }, growth={"growth_term": ["linear"]}, events={ "holiday_pre_num_days": [1], "holiday_post_num_days": [1], "holidays_prior_scale": [1.0] }, changepoints={ "changepoint_prior_scale": [0.05], "n_changepoints": [1], "changepoint_range": [0.5], }, regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10, "standardize": True, "mode": "additive" }, "regressor2": { "prior_scale": 15, "standardize": False, "mode": "additive" }, "regressor3": {} }] }, uncertainty={"uncertainty_samples": [10]}) metadata = MetadataParam( time_col=cst.TIME_COL, value_col=cst.VALUE_COL, freq="D", ) evaluation_period = EvaluationPeriodParam( test_horizon=5, # speeds up test case periods_between_train_test=5, cv_horizon=0, # speeds up test case ) config = ForecastConfig( model_template=ModelTemplateEnum.PROPHET.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=0.95, model_components_param=model_components, evaluation_period_param=evaluation_period, ) result = Forecaster().run_forecast_config( df=df, config=config, ) forecast_df = result.forecast.df_test.reset_index(drop=True) expected_cols = [ "ts", "actual", "forecast", "forecast_lower", "forecast_upper" ] assert list(forecast_df.columns) == expected_cols assert result.backtest.coverage == 0.95, "coverage is not correct" # NB: coverage is poor because of very small dataset size and low uncertainty_samples assert result.backtest.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.677, rel=1e-3), \ "training coverage is None or less than expected" assert result.backtest.test_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.800, rel=1e-3), \ "testing coverage is None or less than expected" assert result.backtest.train_evaluation["MSE"] == pytest.approx(3.7849, rel=1e-3), \ "training MSE is None or more than expected" assert result.backtest.test_evaluation["MSE"] == pytest.approx(2.9609, rel=1e-3), \ "testing MSE is None or more than expected" assert result.forecast.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.7805, rel=1e-3), \ "forecast coverage is None or less than expected" assert result.forecast.train_evaluation["MSE"] == pytest.approx(4.1806, rel=1e-3), \ "forecast MSE is None or more than expected" # ensure regressors were used in the model prophet_estimator = result.model.steps[-1][-1] regressors = prophet_estimator.model.extra_regressors assert regressors.keys() == {"regressor1", "regressor2", "regressor3"} assert regressors["regressor1"]["prior_scale"] == 10.0 assert regressors["regressor1"]["standardize"] is True assert regressors["regressor1"]["mode"] == "additive" assert regressors["regressor2"]["prior_scale"] == 15.0 assert regressors["regressor3"]["standardize"] == "auto"
uncertainty={ "uncertainty_dict": "auto", }, custom={ "fit_algorithm_dict": { "fit_algorithm": "linear", }, }) # Runs the forecast forecaster = Forecaster() result = forecaster.run_forecast_config( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=365, # forecasts 365 steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata, model_components_param=model_components)) # %% # Creating model summary # ^^^^^^^^^^^^^^^^^^^^^^ # Now that we have the output from :py:meth:`~greykite.framework.templates.forecaster.Forecaster.run_forecast_config`, # we are able to access the model summary. # Initializes the model summary class. # ``max_colwidth`` is the maximum length of predictor names that can be displayed. summary = result.model[-1].summary(max_colwidth=30) # %% # The above command creates a model summary class and derives extra information
def test_run_template_2(): """Runs custom template with all options""" data = generate_df_with_reg_for_tests( freq="D", periods=400, remove_extra_cols=True, mask_test_actuals=True) reg_cols = ["regressor1", "regressor2", "regressor_categ"] keep_cols = [TIME_COL, VALUE_COL] + reg_cols df = data["df"][keep_cols] forecast_horizon = data["test_df"].shape[0] daily_event_df_dict = generate_holiday_events( countries=["UnitedStates"], holidays_to_model_separately=["New Year's Day"], year_start=2017, year_end=2022, pre_num=2, post_num=2) event_pred_cols = get_event_pred_cols(daily_event_df_dict) model_components = ModelComponentsParam( seasonality={ "fs_components_df": pd.DataFrame({ "name": ["tow", "tom", "toq", "toy"], "period": [7.0, 1.0, 1.0, 1.0], "order": [2, 1, 1, 5], "seas_names": ["weekly", "monthly", "quarterly", "yearly"] }) }, events={ "daily_event_df_dict": daily_event_df_dict }, changepoints={ "changepoints_dict": { "method": "auto", "yearly_seasonality_order": 3, "regularization_strength": 0.5, "resample_freq": "14D", "potential_changepoint_distance": "56D", "no_changepoint_proportion_from_end": 0.2 }, "seasonality_changepoints_dict": { "potential_changepoint_distance": "60D", "regularization_strength": 0.5, "no_changepoint_proportion_from_end": 0.2 }, }, autoregression=None, uncertainty={ "uncertainty_dict": None, }, custom={ "origin_for_time_vars": None, "extra_pred_cols": [["ct1"] + reg_cols + event_pred_cols], # growth, regressors, events "fit_algorithm_dict": { "fit_algorithm": "ridge", "fit_algorithm_params": {"cv": 2} }, "min_admissible_value": min(df[VALUE_COL]) - abs(max(df[VALUE_COL])), "max_admissible_value": max(df[VALUE_COL]) * 2, } ) config = ForecastConfig( model_template=ModelTemplateEnum.SK.name, forecast_horizon=forecast_horizon, coverage=0.9, model_components_param=model_components, ) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = Forecaster().run_forecast_config( df=df, config=config, ) rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name() q80 = EvaluationMetricEnum.Quantile80.get_metric_name() assert result.backtest.test_evaluation[rmse] == pytest.approx(2.692, rel=1e-2) assert result.backtest.test_evaluation[q80] == pytest.approx(1.531, rel=1e-2) assert result.backtest.test_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.823, rel=1e-2) assert result.forecast.train_evaluation[rmse] == pytest.approx(2.304, rel=1e-2) assert result.forecast.train_evaluation[q80] == pytest.approx(0.921, rel=1e-2) assert result.forecast.train_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.897, rel=1e-2) check_forecast_pipeline_result( result, coverage=0.9, strategy=None, score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name, greater_is_better=False)
def test_run_forecast_config_with_single_simple_silverkite_template(): # The generic name of single simple silverkite templates are not added to `ModelTemplateEnum`, # therefore we test if these are recognized. data = generate_df_for_tests(freq="D", periods=365) df = data["df"] metric = EvaluationMetricEnum.MeanAbsoluteError evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name, agg_periods=7, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.5 }) evaluation_period = EvaluationPeriodParam(test_horizon=10, periods_between_train_test=5, cv_horizon=4, cv_min_train_periods=80, cv_expanding_window=False, cv_periods_between_splits=20, cv_periods_between_train_test=3, cv_max_splits=2) model_components = ModelComponentsParam( hyperparameter_override=[{ "estimator__yearly_seasonality": 1 }, { "estimator__yearly_seasonality": 2 }]) computation = ComputationParam(verbose=2) forecast_horizon = 27 coverage = 0.90 single_template_class = SimpleSilverkiteTemplateOptions( freq=SILVERKITE_COMPONENT_KEYWORDS.FREQ.value.DAILY, seas=SILVERKITE_COMPONENT_KEYWORDS.SEAS.value.NONE) forecast_config = ForecastConfig(model_template=[ single_template_class, "DAILY_ALGO_SGD", "SILVERKITE_DAILY_90" ], computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=model_components) forecaster = Forecaster() result = forecaster.run_forecast_config(df=df, config=forecast_config) summary = summarize_grid_search_results(result.grid_search) # single_template_class is 1 template, # "DAILY_ALGO_SGD" is 1 template and "SILVERKITE_DAILY_90" has 4 templates. # With 2 items in `hyperparameter_override, there should be a total of 12 cases. assert summary.shape[0] == 12 # Tests functionality for single template class only. forecast_config = ForecastConfig(model_template=single_template_class, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon) forecaster = Forecaster() pipeline_parameters = forecaster.apply_forecast_config( df=df, config=forecast_config) assert_equal(actual=pipeline_parameters["hyperparameter_grid"], expected={ "estimator__time_properties": [None], "estimator__origin_for_time_vars": [None], "estimator__train_test_thresh": [None], "estimator__training_fraction": [None], "estimator__fit_algorithm_dict": [{ "fit_algorithm": "linear", "fit_algorithm_params": None }], "estimator__holidays_to_model_separately": [[]], "estimator__holiday_lookup_countries": [[]], "estimator__holiday_pre_num_days": [0], "estimator__holiday_post_num_days": [0], "estimator__holiday_pre_post_num_dict": [None], "estimator__daily_event_df_dict": [None], "estimator__changepoints_dict": [None], "estimator__seasonality_changepoints_dict": [None], "estimator__yearly_seasonality": [0], "estimator__quarterly_seasonality": [0], "estimator__monthly_seasonality": [0], "estimator__weekly_seasonality": [0], "estimator__daily_seasonality": [0], "estimator__max_daily_seas_interaction_order": [0], "estimator__max_weekly_seas_interaction_order": [2], "estimator__autoreg_dict": [None], "estimator__min_admissible_value": [None], "estimator__max_admissible_value": [None], "estimator__uncertainty_dict": [None], "estimator__growth_term": ["linear"], "estimator__regressor_cols": [[]], "estimator__feature_sets_enabled": [False], "estimator__extra_pred_cols": [[]] }, ignore_keys={"estimator__time_properties": None})
def test_silverkite_template_custom(model_components_param): """"Tests simple_silverkite_template with custom parameters, and data that has regressors""" data = generate_df_with_reg_for_tests( freq="H", periods=300*24, remove_extra_cols=True, mask_test_actuals=True) df = data["df"] time_col = "some_time_col" value_col = "some_value_col" df.rename({ TIME_COL: time_col, VALUE_COL: value_col }, axis=1, inplace=True) metric = EvaluationMetricEnum.MeanAbsoluteError # anomaly adjustment adds 10.0 to every record adjustment_size = 10.0 anomaly_df = pd.DataFrame({ START_DATE_COL: [df[time_col].min()], END_DATE_COL: [df[time_col].max()], ADJUSTMENT_DELTA_COL: [adjustment_size], METRIC_COL: [value_col] }) anomaly_info = { "value_col": VALUE_COL, "anomaly_df": anomaly_df, "start_date_col": START_DATE_COL, "end_date_col": END_DATE_COL, "adjustment_delta_col": ADJUSTMENT_DELTA_COL, "filter_by_dict": {METRIC_COL: VALUE_COL}, "adjustment_method": "add" } metadata = MetadataParam( time_col=time_col, value_col=value_col, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 7, 1), anomaly_info=anomaly_info ) evaluation_metric = EvaluationMetricParam( cv_selection_metric=metric.name, cv_report_metrics=[EvaluationMetricEnum.MedianAbsolutePercentError.name], agg_periods=24, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }, relative_error_tolerance=0.01 ) evaluation_period = EvaluationPeriodParam( test_horizon=1, periods_between_train_test=2, cv_horizon=3, cv_min_train_periods=4, cv_expanding_window=True, cv_periods_between_splits=5, cv_periods_between_train_test=6, cv_max_splits=7 ) computation = ComputationParam( hyperparameter_budget=10, n_jobs=None, verbose=1 ) forecast_horizon = 20 coverage = 0.7 template = SilverkiteTemplate() params = template.apply_template_for_pipeline_params( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SK.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, model_components_param=model_components_param, computation_param=computation ) ) pipeline = params.pop("pipeline", None) expected_params = dict( df=df, time_col=time_col, value_col=value_col, date_format=metadata.date_format, freq=metadata.freq, train_end_date=metadata.train_end_date, anomaly_info=metadata.anomaly_info, # model regressor_cols=template.regressor_cols, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=computation.hyperparameter_budget, n_jobs=computation.n_jobs, verbose=computation.verbose, # forecast forecast_horizon=forecast_horizon, coverage=coverage, test_horizon=evaluation_period.test_horizon, periods_between_train_test=evaluation_period.periods_between_train_test, agg_periods=evaluation_metric.agg_periods, agg_func=evaluation_metric.agg_func, relative_error_tolerance=evaluation_metric.relative_error_tolerance, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=evaluation_metric.cv_report_metrics, null_model_params=evaluation_metric.null_model_params, # CV cv_horizon=evaluation_period.cv_horizon, cv_min_train_periods=evaluation_period.cv_min_train_periods, cv_expanding_window=evaluation_period.cv_expanding_window, cv_periods_between_splits=evaluation_period.cv_periods_between_splits, cv_periods_between_train_test=evaluation_period.cv_periods_between_train_test, cv_max_splits=evaluation_period.cv_max_splits ) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def seek_the_oracle( df_index, series, col, forecast_length, freq, prediction_interval=0.9, model_template='silverkite', growth=None, holiday=True, holiday_country="UnitedStates", regressors=None, verbose=0, inner_n_jobs=1, **kwargs ): """Internal. For loop or parallel version of Greykite.""" inner_df = pd.DataFrame( { 'ts': df_index, 'y': series, } ) if regressors is not None: inner_regr = regressors.copy() new_names = [ 'rrrr' + str(x) if x in inner_df.columns else str(x) for x in inner_regr.columns ] inner_regr.columns = new_names inner_regr.index.name = 'ts' inner_regr.reset_index(drop=False, inplace=True) inner_df = inner_df.merge(inner_regr, left_on='ts', right_on='ts', how='outer') metadata = MetadataParam( time_col="ts", # name of the time column ("date" in example above) value_col="y", # name of the value column ("sessions" in example above) freq=freq, # "H" for hourly, "D" for daily, "W" for weekly, etc. ) # INCLUDE forecast_length lagged mean and std of other features! model_template = ModelTemplateEnum.SILVERKITE.name forecaster = Forecaster() # Creates forecasts and stores the result if regressors is not None: model_components = ModelComponentsParam( growth=growth, regressors={"regressor_cols": new_names} ) else: model_components = ModelComponentsParam( growth=growth, # 'linear', 'quadratic', 'sqrt' ) computation = ComputationParam(n_jobs=inner_n_jobs, verbose=verbose) if holiday: # also 'auto' model_components.events = { # These holidays as well as their pre/post dates are modeled as individual events. "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES, # all holidays in "holiday_lookup_countries" "holiday_lookup_countries": [ holiday_country ], # only look up holidays in the United States "holiday_pre_num_days": 1, # also mark the 1 days before a holiday as holiday "holiday_post_num_days": 1, # also mark the 1 days after a holiday as holiday } config = ForecastConfig( model_template=model_template, forecast_horizon=forecast_length, coverage=prediction_interval, model_components_param=model_components, metadata_param=metadata, computation_param=computation, ) result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=inner_df, config=config, ) res_df = result.forecast.df.tail(forecast_length).drop(columns=['actual']) res_df['series_id'] = col return res_df
def test_run_template_5(): """Runs custom template with monthly data, auto-regression and lagged regressors""" data = generate_df_with_reg_for_tests( freq="MS", periods=48, remove_extra_cols=True, mask_test_actuals=True) reg_cols_all = ["regressor1", "regressor2", "regressor_categ"] reg_cols = ["regressor1"] keep_cols = [TIME_COL, VALUE_COL] + reg_cols_all df = data["df"][keep_cols] forecast_horizon = data["test_df"].shape[0] model_components = ModelComponentsParam( custom=dict( fit_algorithm_dict=dict(fit_algorithm="linear"), extra_pred_cols=reg_cols), autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))), lagged_regressors={ "lagged_regressor_dict": [ {"regressor2": "auto"}, {"regressor_categ": {"lag_dict": {"orders": [5]}}} ]}, uncertainty=dict(uncertainty_dict=None)) config = ForecastConfig( model_template=ModelTemplateEnum.SK.name, forecast_horizon=forecast_horizon, coverage=0.9, model_components_param=model_components, ) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = Forecaster().run_forecast_config( df=df, config=config, ) rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name() assert result.backtest.test_evaluation[rmse] == pytest.approx(4.46, rel=1e-1) check_forecast_pipeline_result( result, coverage=0.9, strategy=None, score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name, greater_is_better=False) # Checks lagged regressor columns actual_pred_cols = set(result.model[-1].model_dict["pred_cols"]) actual_x_mat_cols = set(result.model[-1].model_dict["x_mat"].columns) expected_pred_cols = { 'regressor1', 'y_lag1', 'regressor_categ_lag5' } expected_x_mat_cols = { 'regressor1', 'y_lag1', 'regressor_categ_lag5[T.c2]', 'regressor_categ_lag5[T.c2]' } assert expected_pred_cols.issubset(actual_pred_cols) assert expected_x_mat_cols.issubset(actual_x_mat_cols)
def valid_configs(): metadata = MetadataParam(time_col=TIME_COL, value_col=VALUE_COL, freq="D") computation = ComputationParam(hyperparameter_budget=10, n_jobs=None, verbose=1) forecast_horizon = 2 * 7 coverage = 0.90 evaluation_metric = EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name, cv_report_metrics=None, agg_periods=7, agg_func=np.mean, null_model_params=None) evaluation_period = EvaluationPeriodParam(test_horizon=2 * 7, periods_between_train_test=2 * 7, cv_horizon=1 * 7, cv_min_train_periods=8 * 7, cv_expanding_window=True, cv_periods_between_splits=7, cv_periods_between_train_test=3 * 7, cv_max_splits=2) silverkite_components = ModelComponentsParam( seasonality={ "yearly_seasonality": False, "weekly_seasonality": True }, growth={"growth_term": "quadratic"}, events={ "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES, "holiday_lookup_countries": ["UnitedStates"], "holiday_pre_num_days": 3, }, changepoints={ "changepoints_dict": { "method": "uniform", "n_changepoints": 20, } }, regressors={ "regressor_cols": ["regressor1", "regressor2", "regressor3"] }, uncertainty={ "uncertainty_dict": "auto", }, hyperparameter_override={"input__response__null__max_frac": 0.1}, custom={ "fit_algorithm_dict": { "fit_algorithm": "ridge", "fit_algorithm_params": { "normalize": True }, }, "feature_sets_enabled": False }) prophet_components = ModelComponentsParam( seasonality={ "seasonality_mode": ["additive"], "yearly_seasonality": ["auto"], "weekly_seasonality": [True], "daily_seasonality": ["auto"], }, growth={"growth_term": ["linear"]}, events={ "holiday_pre_num_days": [1], "holiday_post_num_days": [1], "holidays_prior_scale": [1.0] }, changepoints={ "changepoint_prior_scale": [0.05], "n_changepoints": [1], "changepoint_range": [0.5], }, regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10, "standardize": True, "mode": 'additive' }, "regressor2": { "prior_scale": 15, "standardize": False, "mode": 'additive' }, "regressor3": {} }] }, uncertainty={"uncertainty_samples": [10]}) valid_prophet = ForecastConfig( model_template=ModelTemplateEnum.PROPHET.name, metadata_param=metadata, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=prophet_components) valid_silverkite = ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, metadata_param=metadata, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=silverkite_components) configs = { "valid_prophet": valid_prophet, "valid_silverkite": valid_silverkite } return configs
def test_default_forecast_config(): """Tests an Empty ForecastConfig dataclass""" assert_default_forecast_config(ForecastConfig())
def test_prophet_template_custom(): """Tests prophet_template with custom values, with long range input""" # prepares input data data = generate_df_with_reg_for_tests(freq="H", periods=300 * 24, remove_extra_cols=True, mask_test_actuals=True) df = data["df"] time_col = "some_time_col" value_col = "some_value_col" df.rename({ cst.TIME_COL: time_col, cst.VALUE_COL: value_col }, axis=1, inplace=True) # prepares params and calls template metric = EvaluationMetricEnum.MeanAbsoluteError # anomaly adjustment adds 10.0 to every record adjustment_size = 10.0 anomaly_df = pd.DataFrame({ cst.START_DATE_COL: [df[time_col].min()], cst.END_DATE_COL: [df[time_col].max()], cst.ADJUSTMENT_DELTA_COL: [adjustment_size], cst.METRIC_COL: [value_col] }) anomaly_info = { "value_col": cst.VALUE_COL, "anomaly_df": anomaly_df, "start_date_col": cst.START_DATE_COL, "end_date_col": cst.END_DATE_COL, "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL, "filter_by_dict": { cst.METRIC_COL: cst.VALUE_COL }, "adjustment_method": "add" } metadata = MetadataParam( time_col=time_col, value_col=value_col, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 7, 1), anomaly_info=anomaly_info, ) evaluation_metric = EvaluationMetricParam( cv_selection_metric=metric.name, cv_report_metrics=[ EvaluationMetricEnum.MedianAbsolutePercentError.name ], agg_periods=24, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }, relative_error_tolerance=0.01) evaluation_period = EvaluationPeriodParam(test_horizon=1, periods_between_train_test=2, cv_horizon=3, cv_min_train_periods=4, cv_expanding_window=True, cv_periods_between_splits=5, cv_periods_between_train_test=6, cv_max_splits=7) model_components = ModelComponentsParam( seasonality={ "yearly_seasonality": [True], "weekly_seasonality": [False], "daily_seasonality": [4], "add_seasonality_dict": [{ "yearly": { "period": 365.25, "fourier_order": 20, "prior_scale": 20.0 }, "quarterly": { "period": 365.25 / 4, "fourier_order": 15 }, "weekly": { "period": 7, "fourier_order": 35, "prior_scale": 30.0 } }] }, growth={"growth_term": "linear"}, events={ "holiday_lookup_countries": ["UnitedStates", "UnitedKingdom", "India"], "holiday_pre_num_days": [2], "holiday_post_num_days": [3], "holidays_prior_scale": [5.0] }, regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10.0, "mode": 'additive' }, "regressor2": { "prior_scale": 20.0, "mode": 'multiplicative' }, }] }, changepoints={ "changepoint_prior_scale": [0.05], "changepoints": [None], "n_changepoints": [50], "changepoint_range": [0.9] }, uncertainty={ "mcmc_samples": [500], "uncertainty_samples": [2000] }, hyperparameter_override={ "input__response__null__impute_algorithm": "ts_interpolate", "input__response__null__impute_params": { "orders": [7, 14] }, "input__regressors_numeric__normalize__normalize_algorithm": "RobustScaler", }) computation = ComputationParam(hyperparameter_budget=10, n_jobs=None, verbose=1) forecast_horizon = 20 coverage = 0.7 config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, model_components_param=model_components, computation_param=computation) template = ProphetTemplate() params = template.apply_template_for_pipeline_params(df=df, config=config) pipeline = params.pop("pipeline", None) # Adding start_year and end_year based on the input df model_components.events["start_year"] = df[time_col].min().year model_components.events["end_year"] = df[time_col].max().year expected_params = dict( df=df, time_col=time_col, value_col=value_col, date_format=metadata.date_format, freq=metadata.freq, train_end_date=metadata.train_end_date, anomaly_info=metadata.anomaly_info, # model regressor_cols=template.regressor_cols, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=computation.hyperparameter_budget, n_jobs=computation.n_jobs, verbose=computation.verbose, # forecast forecast_horizon=forecast_horizon, coverage=coverage, test_horizon=evaluation_period.test_horizon, periods_between_train_test=evaluation_period. periods_between_train_test, agg_periods=evaluation_metric.agg_periods, agg_func=evaluation_metric.agg_func, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=evaluation_metric.cv_report_metrics, null_model_params=evaluation_metric.null_model_params, relative_error_tolerance=evaluation_metric.relative_error_tolerance, # CV cv_horizon=evaluation_period.cv_horizon, cv_min_train_periods=evaluation_period.cv_min_train_periods, cv_expanding_window=evaluation_period.cv_expanding_window, cv_periods_between_splits=evaluation_period.cv_periods_between_splits, cv_periods_between_train_test=evaluation_period. cv_periods_between_train_test, cv_max_splits=evaluation_period.cv_max_splits) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
# Create a forecast # Specifies dataset information metadata = MetadataParam( time_col="ts", # name of the time column value_col="y", # name of the value column freq= "D" #"MS" for Montly at start date, "H" for hourly, "D" for daily, "W" for weekly, etc. ) forecaster = Forecaster() result = forecaster.run_forecast_config( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=1000, # forecasts 100 steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata)) ts = result.timeseries fig = ts.plot() plotly.io.show(fig) # Cross-validation grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=[