Ejemplo n.º 1
0
def test_get_config_with_default_model_template_and_components():
    """Tests `__get_config_with_default_model_template_and_components`"""
    forecaster = Forecaster()
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
    )
    assert config == ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        model_components_param=ModelComponentsParam())

    # Overrides `default_model_template_name`, unnests `model_components_param`.
    forecaster = Forecaster(default_model_template_name="SK")
    config = ForecastConfig(model_components_param=[ModelComponentsParam()])
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
        config)
    assert config == ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        model_components_param=ModelComponentsParam())

    # Overrides `model_template_enum` and `default_model_template_name`
    forecaster = Forecaster(model_template_enum=MyModelTemplateEnum,
                            default_model_template_name="MYSILVERKITE")
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
    )
    assert config == ForecastConfig(
        model_template=MyModelTemplateEnum.MYSILVERKITE.name,
        model_components_param=ModelComponentsParam())
def test_get_regressor_cols():
    """Tests get_regressor_names"""
    template = AutoArimaTemplate()
    # no regressors
    model_components = ModelComponentsParam()
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(regressors={})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None
Ejemplo n.º 3
0
def test_prophet_template_default():
    """Tests prophet_template with default values, for limited data"""
    # prepares input data
    num_days = 10
    data = generate_df_for_tests(freq="D",
                                 periods=num_days,
                                 train_start_date="2018-01-01")
    df = data["df"]
    template = ProphetTemplate()
    config = ForecastConfig(model_template="PROPHET")
    params = template.apply_template_for_pipeline_params(df=df, config=config)
    # not modified
    assert config == ForecastConfig(model_template="PROPHET")
    # checks result
    metric = EvaluationMetricEnum.MeanAbsolutePercentError
    pipeline = params.pop("pipeline", None)
    expected_params = dict(
        df=df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL,
        date_format=None,
        freq=None,
        train_end_date=None,
        anomaly_info=None,
        # model
        regressor_cols=None,
        lagged_regressor_cols=None,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=None,
        n_jobs=COMPUTATION_N_JOBS,
        verbose=1,
        # forecast
        forecast_horizon=None,
        coverage=None,
        test_horizon=None,
        periods_between_train_test=None,
        agg_periods=None,
        agg_func=None,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=CV_REPORT_METRICS_ALL,
        null_model_params=None,
        relative_error_tolerance=None,
        # CV
        cv_horizon=None,
        cv_min_train_periods=None,
        cv_expanding_window=True,
        cv_periods_between_splits=None,
        cv_periods_between_train_test=None,
        cv_max_splits=3)
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Ejemplo n.º 4
0
def test_silverkite_template():
    """Tests test_silverkite_template with default config"""
    data = generate_df_for_tests(freq="D", periods=10)
    df = data["df"]
    template = SilverkiteTemplate()
    config = ForecastConfig(model_template="SK")
    params = template.apply_template_for_pipeline_params(
        df=df,
        config=config
    )
    assert config == ForecastConfig(model_template="SK")  # not modified
    pipeline = params.pop("pipeline", None)

    metric = EvaluationMetricEnum.MeanAbsolutePercentError
    expected_params = dict(
        df=df,
        time_col=TIME_COL,
        value_col=VALUE_COL,
        date_format=None,
        freq=None,
        train_end_date=None,
        anomaly_info=None,
        # model
        regressor_cols=None,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=None,
        n_jobs=COMPUTATION_N_JOBS,
        verbose=1,
        # forecast
        forecast_horizon=None,
        coverage=None,
        test_horizon=None,
        periods_between_train_test=None,
        agg_periods=None,
        agg_func=None,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=CV_REPORT_METRICS_ALL,
        null_model_params=None,
        relative_error_tolerance=None,
        # CV
        cv_horizon=None,
        cv_min_train_periods=None,
        cv_expanding_window=True,
        cv_periods_between_splits=None,
        cv_periods_between_train_test=None,
        cv_max_splits=3
    )
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Ejemplo n.º 5
0
def test_run_template_1():
    """Runs default template"""
    data = generate_df_for_tests(
        freq="H",
        periods=700 * 24)
    df = data["train_df"]
    forecast_horizon = data["test_df"].shape[0]

    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )

        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.037, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(0.836, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.004, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.800, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=None,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Ejemplo n.º 6
0
    def __get_config_with_default_model_template_and_components(
            self, config: Optional[ForecastConfig] = None) -> ForecastConfig:
        """Gets config with default value for `model_template` and `model_components_param` if not provided.

            - model_template : default value is ``self.default_model_template_name``.
            - model_components_param : default value is an empty ModelComponentsParam().

        Parameters
        ----------
        config : :class:`~greykite.framework.templates.model_templates.ForecastConfig` or None
            Config object for template class to use.
            See :class:`~greykite.framework.templates.model_templates.ForecastConfig`.
            If None, uses an empty ForecastConfig.

        Returns
        -------
        config : :class:`~greykite.framework.templates.model_templates.ForecastConfig`
            Input ``config`` with default ``model_template`` populated.
            If ``config.model_template`` is None, it is set to ``self.default_model_template_name``.
            If ``config.model_components_param`` is None, it is set to ``ModelComponentsParam()``.
        """
        config = config if config is not None else ForecastConfig()
        # Unpacks list of a single element and sets default value if None.
        # NB: Does not call `apply_forecast_config_defaults`.
        #   Only sets `model_template` and `model_components_param`.
        #   The template class may have its own implementation of forecast config defaults.
        forecast_config_defaults = ForecastConfigDefaults()
        forecast_config_defaults.DEFAULT_MODEL_TEMPLATE = self.default_model_template_name
        config.model_template = forecast_config_defaults.apply_model_template_defaults(
            config.model_template)
        config.model_components_param = forecast_config_defaults.apply_model_components_defaults(
            config.model_components_param)
        return config
Ejemplo n.º 7
0
def test_apply_template_for_pipeline_params(df):
    mt = MyTemplate()
    config = ForecastConfig(metadata_param=MetadataParam(
        time_col=NEW_TIME_COL,
        value_col=NEW_VALUE_COL,
    ),
                            evaluation_metric_param=EvaluationMetricParam(
                                cv_selection_metric="MeanSquaredError"))
    original_config = dataclasses.replace(config)

    # Tests apply_template_for_pipeline_params
    pipeline_params = mt.apply_template_for_pipeline_params(df=df,
                                                            config=config)

    assert_equal(pipeline_params["df"], df)
    assert pipeline_params["train_end_date"] is None
    estimator = pipeline_params["pipeline"].steps[-1][-1]
    assert isinstance(estimator, SilverkiteEstimator)
    assert estimator.coverage == mt.config.coverage
    assert mt.estimator is not estimator
    assert mt.estimator.coverage is None
    assert (
        pipeline_params["pipeline"].named_steps["input"].transformer_list[2]
        [1].named_steps["select_reg"].column_names == mt.get_regressor_cols())

    # Tests `apply_template_decorator`
    assert mt.config == mt.apply_forecast_config_defaults(config)
    assert mt.config != config  # `mt.config` has default values added
    assert config == original_config  # `config` is not modified by the function
Ejemplo n.º 8
0
def test_get_forecast_time_properties(df):
    mt = MyTemplate()
    mt.df = df

    # with `train_end_date` (masking applied)
    mt.config = ForecastConfig(
        coverage=0.9,
        forecast_horizon=20,
        metadata_param=MetadataParam(
            time_col=NEW_TIME_COL,
            value_col=NEW_VALUE_COL,
            freq="H",
            date_format="%Y-%m-%d-%H",
            train_end_date=datetime.datetime(2019, 2, 1),
        )
    )
    mt.regressor_cols = mt.get_regressor_cols()
    mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"]
    time_properties = mt.get_forecast_time_properties()

    period = 3600  # seconds between observations
    time_delta = (mt.config.metadata_param.train_end_date - df[mt.config.metadata_param.time_col].min())  # train end - train start
    num_training_days = (time_delta.days + (time_delta.seconds + period) / TimeEnum.ONE_DAY_IN_SECONDS.value)
    assert time_properties["num_training_days"] == num_training_days

    # without `train_end_date`
    mt.config.metadata_param.train_end_date = None
    time_properties = mt.get_forecast_time_properties()
    time_delta = (datetime.datetime(2019, 2, 26) - df[mt.config.metadata_param.time_col].min())  # by default, train end is the last date with nonnull value_col
    num_training_days = (time_delta.days + (time_delta.seconds + period) / TimeEnum.ONE_DAY_IN_SECONDS.value)
    assert time_properties["num_training_days"] == num_training_days
Ejemplo n.º 9
0
def test_get_pipeline(df):
    mt = MyTemplate()
    # Initializes attributes needed by the function
    mt.regressor_cols = mt.get_regressor_cols()
    mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"]
    metric = EvaluationMetricEnum.MeanSquaredError
    mt.score_func = metric.name
    mt.score_func_greater_is_better = metric.get_metric_greater_is_better()
    mt.config = ForecastConfig(
        coverage=0.9,
        evaluation_metric_param=EvaluationMetricParam(
            cv_selection_metric=metric.name
        )
    )
    # Checks get_pipeline output
    pipeline = mt.get_pipeline()
    assert isinstance(pipeline, sklearn.pipeline.Pipeline)
    estimator = pipeline.steps[-1][-1]
    assert isinstance(estimator, SilverkiteEstimator)
    assert estimator.coverage == mt.config.coverage
    assert mt.estimator is not estimator
    assert mt.estimator.coverage is None
    expected_col_names = ["regressor1", "regressor2", "regressor_categ", "regressor_bool"]
    assert pipeline.named_steps["input"].transformer_list[2][1].named_steps["select_reg"].column_names == expected_col_names
    assert_eval_function_equal(pipeline.steps[-1][-1].score_func,
                               metric.get_metric_func())
Ejemplo n.º 10
0
def test_estimator_get_coef_summary_from_forecaster():
    """Tests model summary for silverkite model with missing values in value_col after everything is setup by Forecaster"""
    dl = DataLoader()
    df_pt = dl.load_peyton_manning()
    config = ForecastConfig().from_dict(
        dict(model_template=ModelTemplateEnum.SILVERKITE.name,
             forecast_horizon=10,
             metadata_param=dict(time_col="ts", value_col="y", freq="D"),
             model_components_param=dict(
                 custom={"fit_algorithm_dict": {
                     "fit_algorithm": "linear"
                 }})))
    result = Forecaster().run_forecast_config(
        df=df_pt[:365],  # shortens df to speed up
        config=config)
    summary = result.model[-1].summary()
    x = summary.get_coef_summary(is_intercept=True, return_df=True)
    assert x.shape[0] == 1
    summary.get_coef_summary(is_time_feature=True)
    summary.get_coef_summary(is_event=True)
    summary.get_coef_summary(is_trend=True)
    summary.get_coef_summary(is_interaction=True)
    x = summary.get_coef_summary(is_lag=True)
    assert x is None
    x = summary.get_coef_summary(is_trend=True,
                                 is_seasonality=False,
                                 is_interaction=False,
                                 return_df=True)
    assert all([":" not in col for col in x["Pred_col"].tolist()])
    assert "ct1" in x["Pred_col"].tolist()
    assert "sin1_ct1_yearly" not in x["Pred_col"].tolist()
    x = summary.get_coef_summary(return_df=True)
    assert x.shape[0] == summary.info_dict["coef_summary_df"].shape[0]
Ejemplo n.º 11
0
def test_forecast_config():
    """Tests ForecastConfig dataclass"""
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        metadata_param=MetadataParam(time_col="custom_time_col",
                                     anomaly_info=[{
                                         "key": "value"
                                     }, {
                                         "key2": "value2"
                                     }]),
        evaluation_period_param=EvaluationPeriodParam(
            test_horizon=10,
            periods_between_train_test=5,
            cv_min_train_periods=20),
        evaluation_metric_param=EvaluationMetricParam(
            cv_selection_metric=EvaluationMetricEnum.MeanSquaredError.name,
            cv_report_metrics=[
                EvaluationMetricEnum.MeanAbsoluteError.name,
                EvaluationMetricEnum.MeanAbsolutePercentError.name
            ],
            relative_error_tolerance=0.02),
        model_components_param=ModelComponentsParam(
            autoregression={"autoreg_dict": {
                "autoreg_param": 0
            }},
            changepoints=None,
            custom={"custom_param": 1},
            growth={"growth_param": 2},
            events={"events_param": 3},
            hyperparameter_override=[{
                "h1": 4
            }, {
                "h2": 5
            }, None],
            regressors={"names": ["regressor1", "regressor2"]},
            lagged_regressors={"lagged_regressor_dict": {
                "lag_reg_param": 0
            }},
            seasonality={"seas_param": 6},
            uncertainty={"uncertainty_param": 7}),
        computation_param=ComputationParam(n_jobs=None))
    assert_forecast_config(config)

    # Tests a string passed to `cv_report_metrics`
    assert ForecastConfig(evaluation_metric_param=EvaluationMetricParam(
        cv_report_metrics=CV_REPORT_METRICS_ALL), ).to_dict()
Ejemplo n.º 12
0
def test_apply_template_decorator():
    data = generate_df_for_tests(freq="D", periods=10)
    df = data["df"]
    template = ProphetTemplate()
    with pytest.raises(
            ValueError,
            match=
            "ProphetTemplate only supports config.model_template='PROPHET', found 'UNKNOWN'"
    ):
        template.apply_template_for_pipeline_params(
            df=df, config=ForecastConfig(model_template="UNKNOWN"))
Ejemplo n.º 13
0
def df_config():
    data = generate_df_with_reg_for_tests(freq="W-MON",
                                          periods=140,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]

    model_template = "SILVERKITE"
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name,
        agg_periods=7,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.5
        })
    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=3)
    model_components = ModelComponentsParam(
        regressors={"regressor_cols": reg_cols},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "cv": 2
                }
            }
        })
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90
    config = ForecastConfig(model_template=model_template,
                            computation_param=computation,
                            coverage=coverage,
                            evaluation_metric_param=evaluation_metric,
                            evaluation_period_param=evaluation_period,
                            forecast_horizon=forecast_horizon,
                            model_components_param=model_components)
    return {
        "df": df,
        "config": config,
        "model_template": model_template,
        "reg_cols": reg_cols,
    }
Ejemplo n.º 14
0
def test_template_interface():
    mt = MyTemplate()
    assert mt.df is None
    assert mt.config is None
    assert mt.pipeline_params is None
    assert mt.allow_model_template_list is False
    assert mt.allow_model_components_param_list is False

    df = pandas.DataFrame({"a": [1, 2, 3]})
    assert mt.apply_template_for_pipeline_params(
        df=df,
        config=ForecastConfig()) == {"value": df.shape[0]}
Ejemplo n.º 15
0
def test_run_forecast_config():
    """Tests `run_forecast_config`"""
    data = generate_df_for_tests(freq="H", periods=14 * 24)
    df = data["df"]

    # Checks if exception is raised
    with pytest.raises(ValueError, match="is not recognized"):
        forecaster = Forecaster()
        forecaster.run_forecast_config(
            df=df, config=ForecastConfig(model_template="unknown_template"))
    with pytest.raises(ValueError, match="is not recognized"):
        forecaster = Forecaster()
        forecaster.run_forecast_json(
            df=df, json_str="""{ "model_template": "unknown_template" }""")

    # All run_forecast_config* functions return the same result for the default config,
    # call forecast_pipeline, and return a result with the proper format.
    np.random.seed(123)
    forecaster = Forecaster()
    default_result = forecaster.run_forecast_config(df=df)
    score_func = EvaluationMetricEnum.MeanAbsolutePercentError.name
    check_forecast_pipeline_result(default_result,
                                   coverage=None,
                                   strategy=None,
                                   score_func=score_func,
                                   greater_is_better=False)
    assert_equal(forecaster.forecast_result, default_result)

    np.random.seed(123)
    forecaster = Forecaster()
    json_result = forecaster.run_forecast_json(df=df)
    check_forecast_pipeline_result(json_result,
                                   coverage=None,
                                   strategy=None,
                                   score_func=score_func,
                                   greater_is_better=False)
    assert_forecast_pipeline_result_equal(json_result,
                                          default_result,
                                          rel=0.02)
Ejemplo n.º 16
0
    def apply_forecast_config_defaults(self,
                                       config: Optional[ForecastConfig] = None
                                       ) -> ForecastConfig:
        """Applies the default Forecast Config values to the given config.
        If an expected attribute value is provided, the value is unchanged.
        Otherwise the default value for it is used.
        Other attributes are untouched.
        If the input config is None, it creates a Forecast Config.

        Parameters
        ----------
        config : :class:`~greykite.framework.templates.autogen.forecast_config.ForecastConfig` or None
            Forecast configuration if available. See
            :class:`~greykite.framework.templates.autogen.forecast_config.ForecastConfig`.

        Returns
        -------
        config : :class:`~greykite.framework.templates.model_templates.ForecastConfig`
            A valid Forecast Config which contains the provided attribute values and the default attribute values if not.
        """
        if config is None:
            config = ForecastConfig()
        else:
            # makes a copy to avoid mutating input
            config = dataclasses.replace(config)

        config.computation_param = self.apply_computation_defaults(
            config.computation_param)
        config.evaluation_metric_param = self.apply_evaluation_metric_defaults(
            config.evaluation_metric_param)
        config.evaluation_period_param = self.apply_evaluation_period_defaults(
            config.evaluation_period_param)
        config.metadata_param = self.apply_metadata_defaults(
            config.metadata_param)
        config.model_components_param = self.apply_model_components_defaults(
            config.model_components_param)
        config.model_template = self.apply_model_template_defaults(
            config.model_template)
        return config
Ejemplo n.º 17
0
def test_run_template_4():
    """Runs custom template with monthly data and auto-regression"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=["ct2"]),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.95, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Ejemplo n.º 18
0
def test_estimator_plot_components_from_forecaster():
    """Tests estimator's plot_components function after the Forecaster has set everything up at the top most level"""
    # Test with real data (Female-births) via model template
    dl = DataLoader()
    data_path = dl.get_data_home(data_sub_dir="daily")
    df = dl.get_df(data_path=data_path, data_name="daily_female_births")
    metadata = MetadataParam(time_col="Date", value_col="Births", freq="D")
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": True,
            "quarterly_seasonality": True,
            "weekly_seasonality": True,
            "daily_seasonality": False
        })
    result = Forecaster().run_forecast_config(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SILVERKITE.name,
            forecast_horizon=30,  # forecast 1 month
            coverage=0.95,  # 95% prediction intervals
            metadata_param=metadata,
            model_components_param=model_components))
    estimator = result.model.steps[-1][-1]
    assert estimator.plot_components()
Ejemplo n.º 19
0
def test_get_regressor_cols():
    """Tests get_regressor_names"""
    # `add_regressor_dict` is a list of dict
    template = ProphetTemplate()
    model_components = ModelComponentsParam(
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }, None, {
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor4": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor5": {}
            }]
        })
    template.config = ForecastConfig(model_components_param=model_components)
    assert set(template.get_regressor_cols()) == {
        "regressor1", "regressor2", "regressor3", "regressor4", "regressor5"
    }

    # `add_regressor_dict` is a single dict
    model_components = ModelComponentsParam(
        regressors={
            "add_regressor_dict": {
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }
        })
    template.config = ForecastConfig(model_components_param=model_components)
    assert set(template.get_regressor_cols()) == {
        "regressor1", "regressor2", "regressor3"
    }

    # no regressors
    model_components = ModelComponentsParam()
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(regressors={})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(
        regressors={"add_regressor_dict": []})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(
        regressors={"add_regressor_dict": [{}, None]})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None
Ejemplo n.º 20
0
def test_run_prophet_template_custom():
    """Tests running prophet template through the pipeline"""
    data = generate_df_with_reg_for_tests(freq="D",
                                          periods=50,
                                          train_frac=0.8,
                                          conti_year_origin=2018,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    # select relevant columns for testing
    relevant_cols = [
        cst.TIME_COL, cst.VALUE_COL, "regressor1", "regressor2", "regressor3"
    ]
    df = data["df"][relevant_cols]
    forecast_horizon = data["fut_time_num"]

    # Model components - custom holidays; other params as defaults
    model_components = ModelComponentsParam(
        seasonality={
            "seasonality_mode": ["additive"],
            "yearly_seasonality": ["auto"],
            "weekly_seasonality": [True],
            "daily_seasonality": ["auto"],
        },
        growth={"growth_term": ["linear"]},
        events={
            "holiday_pre_num_days": [1],
            "holiday_post_num_days": [1],
            "holidays_prior_scale": [1.0]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "n_changepoints": [1],
            "changepoint_range": [0.5],
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }]
        },
        uncertainty={"uncertainty_samples": [10]})

    metadata = MetadataParam(
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL,
        freq="D",
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=5,  # speeds up test case
        periods_between_train_test=5,
        cv_horizon=0,  # speeds up test case
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name,
        metadata_param=metadata,
        forecast_horizon=forecast_horizon,
        coverage=0.95,
        model_components_param=model_components,
        evaluation_period_param=evaluation_period,
    )
    result = Forecaster().run_forecast_config(
        df=df,
        config=config,
    )

    forecast_df = result.forecast.df_test.reset_index(drop=True)
    expected_cols = [
        "ts", "actual", "forecast", "forecast_lower", "forecast_upper"
    ]
    assert list(forecast_df.columns) == expected_cols
    assert result.backtest.coverage == 0.95, "coverage is not correct"
    # NB: coverage is poor because of very small dataset size and low uncertainty_samples
    assert result.backtest.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.677, rel=1e-3), \
        "training coverage is None or less than expected"
    assert result.backtest.test_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.800, rel=1e-3), \
        "testing coverage is None or less than expected"
    assert result.backtest.train_evaluation["MSE"] == pytest.approx(3.7849, rel=1e-3), \
        "training MSE is None or more than expected"
    assert result.backtest.test_evaluation["MSE"] == pytest.approx(2.9609, rel=1e-3), \
        "testing MSE is None or more than expected"
    assert result.forecast.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.7805, rel=1e-3), \
        "forecast coverage is None or less than expected"
    assert result.forecast.train_evaluation["MSE"] == pytest.approx(4.1806, rel=1e-3), \
        "forecast MSE is None or more than expected"

    # ensure regressors were used in the model
    prophet_estimator = result.model.steps[-1][-1]
    regressors = prophet_estimator.model.extra_regressors
    assert regressors.keys() == {"regressor1", "regressor2", "regressor3"}
    assert regressors["regressor1"]["prior_scale"] == 10.0
    assert regressors["regressor1"]["standardize"] is True
    assert regressors["regressor1"]["mode"] == "additive"
    assert regressors["regressor2"]["prior_scale"] == 15.0
    assert regressors["regressor3"]["standardize"] == "auto"
Ejemplo n.º 21
0
                                        uncertainty={
                                            "uncertainty_dict": "auto",
                                        },
                                        custom={
                                            "fit_algorithm_dict": {
                                                "fit_algorithm": "linear",
                                            },
                                        })

# Runs the forecast
forecaster = Forecaster()
result = forecaster.run_forecast_config(
    df=df,
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        forecast_horizon=365,  # forecasts 365 steps ahead
        coverage=0.95,  # 95% prediction intervals
        metadata_param=metadata,
        model_components_param=model_components))

# %%
# Creating model summary
# ^^^^^^^^^^^^^^^^^^^^^^
# Now that we have the output from :py:meth:`~greykite.framework.templates.forecaster.Forecaster.run_forecast_config`,
# we are able to access the model summary.

# Initializes the model summary class.
# ``max_colwidth`` is the maximum length of predictor names that can be displayed.
summary = result.model[-1].summary(max_colwidth=30)

# %%
# The above command creates a model summary class and derives extra information
Ejemplo n.º 22
0
def test_run_template_2():
    """Runs custom template with all options"""
    data = generate_df_with_reg_for_tests(
        freq="D",
        periods=400,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    daily_event_df_dict = generate_holiday_events(
        countries=["UnitedStates"],
        holidays_to_model_separately=["New Year's Day"],
        year_start=2017,
        year_end=2022,
        pre_num=2,
        post_num=2)
    event_pred_cols = get_event_pred_cols(daily_event_df_dict)
    model_components = ModelComponentsParam(
        seasonality={
            "fs_components_df": pd.DataFrame({
                "name": ["tow", "tom", "toq", "toy"],
                "period": [7.0, 1.0, 1.0, 1.0],
                "order": [2, 1, 1, 5],
                "seas_names": ["weekly", "monthly", "quarterly", "yearly"]
            })
        },
        events={
            "daily_event_df_dict": daily_event_df_dict
        },
        changepoints={
            "changepoints_dict": {
                "method": "auto",
                "yearly_seasonality_order": 3,
                "regularization_strength": 0.5,
                "resample_freq": "14D",
                "potential_changepoint_distance": "56D",
                "no_changepoint_proportion_from_end": 0.2
            },
            "seasonality_changepoints_dict": {
                "potential_changepoint_distance": "60D",
                "regularization_strength": 0.5,
                "no_changepoint_proportion_from_end": 0.2
            },
        },
        autoregression=None,
        uncertainty={
            "uncertainty_dict": None,
        },
        custom={
            "origin_for_time_vars": None,
            "extra_pred_cols": [["ct1"] + reg_cols + event_pred_cols],  # growth, regressors, events
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {"cv": 2}
            },
            "min_admissible_value": min(df[VALUE_COL]) - abs(max(df[VALUE_COL])),
            "max_admissible_value": max(df[VALUE_COL]) * 2,
        }
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.692, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.531, rel=1e-2)
        assert result.backtest.test_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.823, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.304, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.921, rel=1e-2)
        assert result.forecast.train_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.897, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Ejemplo n.º 23
0
def test_run_forecast_config_with_single_simple_silverkite_template():
    # The generic name of single simple silverkite templates are not added to `ModelTemplateEnum`,
    # therefore we test if these are recognized.
    data = generate_df_for_tests(freq="D", periods=365)
    df = data["df"]
    metric = EvaluationMetricEnum.MeanAbsoluteError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name,
                                              agg_periods=7,
                                              agg_func=np.max,
                                              null_model_params={
                                                  "strategy": "quantile",
                                                  "constant": None,
                                                  "quantile": 0.5
                                              })

    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=2)

    model_components = ModelComponentsParam(
        hyperparameter_override=[{
            "estimator__yearly_seasonality": 1
        }, {
            "estimator__yearly_seasonality": 2
        }])
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90

    single_template_class = SimpleSilverkiteTemplateOptions(
        freq=SILVERKITE_COMPONENT_KEYWORDS.FREQ.value.DAILY,
        seas=SILVERKITE_COMPONENT_KEYWORDS.SEAS.value.NONE)

    forecast_config = ForecastConfig(model_template=[
        single_template_class, "DAILY_ALGO_SGD", "SILVERKITE_DAILY_90"
    ],
                                     computation_param=computation,
                                     coverage=coverage,
                                     evaluation_metric_param=evaluation_metric,
                                     evaluation_period_param=evaluation_period,
                                     forecast_horizon=forecast_horizon,
                                     model_components_param=model_components)

    forecaster = Forecaster()
    result = forecaster.run_forecast_config(df=df, config=forecast_config)

    summary = summarize_grid_search_results(result.grid_search)
    # single_template_class is 1 template,
    # "DAILY_ALGO_SGD" is 1 template and "SILVERKITE_DAILY_90" has 4 templates.
    # With 2 items in `hyperparameter_override, there should be a total of 12 cases.
    assert summary.shape[0] == 12

    # Tests functionality for single template class only.
    forecast_config = ForecastConfig(model_template=single_template_class,
                                     computation_param=computation,
                                     coverage=coverage,
                                     evaluation_metric_param=evaluation_metric,
                                     evaluation_period_param=evaluation_period,
                                     forecast_horizon=forecast_horizon)

    forecaster = Forecaster()
    pipeline_parameters = forecaster.apply_forecast_config(
        df=df, config=forecast_config)
    assert_equal(actual=pipeline_parameters["hyperparameter_grid"],
                 expected={
                     "estimator__time_properties": [None],
                     "estimator__origin_for_time_vars": [None],
                     "estimator__train_test_thresh": [None],
                     "estimator__training_fraction": [None],
                     "estimator__fit_algorithm_dict": [{
                         "fit_algorithm":
                         "linear",
                         "fit_algorithm_params":
                         None
                     }],
                     "estimator__holidays_to_model_separately": [[]],
                     "estimator__holiday_lookup_countries": [[]],
                     "estimator__holiday_pre_num_days": [0],
                     "estimator__holiday_post_num_days": [0],
                     "estimator__holiday_pre_post_num_dict": [None],
                     "estimator__daily_event_df_dict": [None],
                     "estimator__changepoints_dict": [None],
                     "estimator__seasonality_changepoints_dict": [None],
                     "estimator__yearly_seasonality": [0],
                     "estimator__quarterly_seasonality": [0],
                     "estimator__monthly_seasonality": [0],
                     "estimator__weekly_seasonality": [0],
                     "estimator__daily_seasonality": [0],
                     "estimator__max_daily_seas_interaction_order": [0],
                     "estimator__max_weekly_seas_interaction_order": [2],
                     "estimator__autoreg_dict": [None],
                     "estimator__min_admissible_value": [None],
                     "estimator__max_admissible_value": [None],
                     "estimator__uncertainty_dict": [None],
                     "estimator__growth_term": ["linear"],
                     "estimator__regressor_cols": [[]],
                     "estimator__feature_sets_enabled": [False],
                     "estimator__extra_pred_cols": [[]]
                 },
                 ignore_keys={"estimator__time_properties": None})
Ejemplo n.º 24
0
def test_silverkite_template_custom(model_components_param):
    """"Tests simple_silverkite_template with custom parameters,
    and data that has regressors"""
    data = generate_df_with_reg_for_tests(
        freq="H",
        periods=300*24,
        remove_extra_cols=True,
        mask_test_actuals=True)
    df = data["df"]
    time_col = "some_time_col"
    value_col = "some_value_col"
    df.rename({
        TIME_COL: time_col,
        VALUE_COL: value_col
    }, axis=1, inplace=True)

    metric = EvaluationMetricEnum.MeanAbsoluteError
    # anomaly adjustment adds 10.0 to every record
    adjustment_size = 10.0
    anomaly_df = pd.DataFrame({
        START_DATE_COL: [df[time_col].min()],
        END_DATE_COL: [df[time_col].max()],
        ADJUSTMENT_DELTA_COL: [adjustment_size],
        METRIC_COL: [value_col]
    })
    anomaly_info = {
        "value_col": VALUE_COL,
        "anomaly_df": anomaly_df,
        "start_date_col": START_DATE_COL,
        "end_date_col": END_DATE_COL,
        "adjustment_delta_col": ADJUSTMENT_DELTA_COL,
        "filter_by_dict": {METRIC_COL: VALUE_COL},
        "adjustment_method": "add"
    }
    metadata = MetadataParam(
        time_col=time_col,
        value_col=value_col,
        freq="H",
        date_format="%Y-%m-%d-%H",
        train_end_date=datetime.datetime(2019, 7, 1),
        anomaly_info=anomaly_info
    )
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=metric.name,
        cv_report_metrics=[EvaluationMetricEnum.MedianAbsolutePercentError.name],
        agg_periods=24,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.8
        },
        relative_error_tolerance=0.01
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=1,
        periods_between_train_test=2,
        cv_horizon=3,
        cv_min_train_periods=4,
        cv_expanding_window=True,
        cv_periods_between_splits=5,
        cv_periods_between_train_test=6,
        cv_max_splits=7
    )
    computation = ComputationParam(
        hyperparameter_budget=10,
        n_jobs=None,
        verbose=1
    )
    forecast_horizon = 20
    coverage = 0.7
    template = SilverkiteTemplate()
    params = template.apply_template_for_pipeline_params(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SK.name,
            metadata_param=metadata,
            forecast_horizon=forecast_horizon,
            coverage=coverage,
            evaluation_metric_param=evaluation_metric,
            evaluation_period_param=evaluation_period,
            model_components_param=model_components_param,
            computation_param=computation
        )
    )
    pipeline = params.pop("pipeline", None)
    expected_params = dict(
        df=df,
        time_col=time_col,
        value_col=value_col,
        date_format=metadata.date_format,
        freq=metadata.freq,
        train_end_date=metadata.train_end_date,
        anomaly_info=metadata.anomaly_info,
        # model
        regressor_cols=template.regressor_cols,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=computation.hyperparameter_budget,
        n_jobs=computation.n_jobs,
        verbose=computation.verbose,
        # forecast
        forecast_horizon=forecast_horizon,
        coverage=coverage,
        test_horizon=evaluation_period.test_horizon,
        periods_between_train_test=evaluation_period.periods_between_train_test,
        agg_periods=evaluation_metric.agg_periods,
        agg_func=evaluation_metric.agg_func,
        relative_error_tolerance=evaluation_metric.relative_error_tolerance,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=evaluation_metric.cv_report_metrics,
        null_model_params=evaluation_metric.null_model_params,
        # CV
        cv_horizon=evaluation_period.cv_horizon,
        cv_min_train_periods=evaluation_period.cv_min_train_periods,
        cv_expanding_window=evaluation_period.cv_expanding_window,
        cv_periods_between_splits=evaluation_period.cv_periods_between_splits,
        cv_periods_between_train_test=evaluation_period.cv_periods_between_train_test,
        cv_max_splits=evaluation_period.cv_max_splits
    )
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Ejemplo n.º 25
0
def seek_the_oracle(
    df_index,
    series,
    col,
    forecast_length,
    freq,
    prediction_interval=0.9,
    model_template='silverkite',
    growth=None,
    holiday=True,
    holiday_country="UnitedStates",
    regressors=None,
    verbose=0,
    inner_n_jobs=1,
    **kwargs
):
    """Internal. For loop or parallel version of Greykite."""
    inner_df = pd.DataFrame(
        {
            'ts': df_index,
            'y': series,
        }
    )
    if regressors is not None:
        inner_regr = regressors.copy()
        new_names = [
            'rrrr' + str(x) if x in inner_df.columns else str(x)
            for x in inner_regr.columns
        ]
        inner_regr.columns = new_names
        inner_regr.index.name = 'ts'
        inner_regr.reset_index(drop=False, inplace=True)
        inner_df = inner_df.merge(inner_regr, left_on='ts', right_on='ts', how='outer')
    metadata = MetadataParam(
        time_col="ts",  # name of the time column ("date" in example above)
        value_col="y",  # name of the value column ("sessions" in example above)
        freq=freq,  # "H" for hourly, "D" for daily, "W" for weekly, etc.
    )
    # INCLUDE forecast_length lagged mean and std of other features!
    model_template = ModelTemplateEnum.SILVERKITE.name
    forecaster = Forecaster()  # Creates forecasts and stores the result
    if regressors is not None:
        model_components = ModelComponentsParam(
            growth=growth, regressors={"regressor_cols": new_names}
        )
    else:
        model_components = ModelComponentsParam(
            growth=growth,  # 'linear', 'quadratic', 'sqrt'
        )
    computation = ComputationParam(n_jobs=inner_n_jobs, verbose=verbose)
    if holiday:  # also 'auto'
        model_components.events = {
            # These holidays as well as their pre/post dates are modeled as individual events.
            "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES,  # all holidays in "holiday_lookup_countries"
            "holiday_lookup_countries": [
                holiday_country
            ],  # only look up holidays in the United States
            "holiday_pre_num_days": 1,  # also mark the 1 days before a holiday as holiday
            "holiday_post_num_days": 1,  # also mark the 1 days after a holiday as holiday
        }
    config = ForecastConfig(
        model_template=model_template,
        forecast_horizon=forecast_length,
        coverage=prediction_interval,
        model_components_param=model_components,
        metadata_param=metadata,
        computation_param=computation,
    )
    result = forecaster.run_forecast_config(  # result is also stored as `forecaster.forecast_result`.
        df=inner_df,
        config=config,
    )
    res_df = result.forecast.df.tail(forecast_length).drop(columns=['actual'])
    res_df['series_id'] = col
    return res_df
Ejemplo n.º 26
0
def test_run_template_5():
    """Runs custom template with monthly data, auto-regression and lagged regressors"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols_all = ["regressor1", "regressor2", "regressor_categ"]
    reg_cols = ["regressor1"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols_all
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=reg_cols),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        lagged_regressors={
            "lagged_regressor_dict": [
                {"regressor2": "auto"},
                {"regressor_categ": {"lag_dict": {"orders": [5]}}}
            ]},
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.46, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
        # Checks lagged regressor columns
        actual_pred_cols = set(result.model[-1].model_dict["pred_cols"])
        actual_x_mat_cols = set(result.model[-1].model_dict["x_mat"].columns)
        expected_pred_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5'
        }
        expected_x_mat_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5[T.c2]',
            'regressor_categ_lag5[T.c2]'
        }
        assert expected_pred_cols.issubset(actual_pred_cols)
        assert expected_x_mat_cols.issubset(actual_x_mat_cols)
Ejemplo n.º 27
0
def valid_configs():
    metadata = MetadataParam(time_col=TIME_COL, value_col=VALUE_COL, freq="D")
    computation = ComputationParam(hyperparameter_budget=10,
                                   n_jobs=None,
                                   verbose=1)
    forecast_horizon = 2 * 7
    coverage = 0.90
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name,
        cv_report_metrics=None,
        agg_periods=7,
        agg_func=np.mean,
        null_model_params=None)
    evaluation_period = EvaluationPeriodParam(test_horizon=2 * 7,
                                              periods_between_train_test=2 * 7,
                                              cv_horizon=1 * 7,
                                              cv_min_train_periods=8 * 7,
                                              cv_expanding_window=True,
                                              cv_periods_between_splits=7,
                                              cv_periods_between_train_test=3 *
                                              7,
                                              cv_max_splits=2)

    silverkite_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": False,
            "weekly_seasonality": True
        },
        growth={"growth_term": "quadratic"},
        events={
            "holidays_to_model_separately":
            SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES,
            "holiday_lookup_countries": ["UnitedStates"],
            "holiday_pre_num_days": 3,
        },
        changepoints={
            "changepoints_dict": {
                "method": "uniform",
                "n_changepoints": 20,
            }
        },
        regressors={
            "regressor_cols": ["regressor1", "regressor2", "regressor3"]
        },
        uncertainty={
            "uncertainty_dict": "auto",
        },
        hyperparameter_override={"input__response__null__max_frac": 0.1},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "normalize": True
                },
            },
            "feature_sets_enabled": False
        })

    prophet_components = ModelComponentsParam(
        seasonality={
            "seasonality_mode": ["additive"],
            "yearly_seasonality": ["auto"],
            "weekly_seasonality": [True],
            "daily_seasonality": ["auto"],
        },
        growth={"growth_term": ["linear"]},
        events={
            "holiday_pre_num_days": [1],
            "holiday_post_num_days": [1],
            "holidays_prior_scale": [1.0]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "n_changepoints": [1],
            "changepoint_range": [0.5],
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": 'additive'
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": 'additive'
                },
                "regressor3": {}
            }]
        },
        uncertainty={"uncertainty_samples": [10]})

    valid_prophet = ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name,
        metadata_param=metadata,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=prophet_components)

    valid_silverkite = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        metadata_param=metadata,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=silverkite_components)

    configs = {
        "valid_prophet": valid_prophet,
        "valid_silverkite": valid_silverkite
    }
    return configs
Ejemplo n.º 28
0
def test_default_forecast_config():
    """Tests an Empty ForecastConfig dataclass"""
    assert_default_forecast_config(ForecastConfig())
Ejemplo n.º 29
0
def test_prophet_template_custom():
    """Tests prophet_template with custom values, with long range input"""
    # prepares input data
    data = generate_df_with_reg_for_tests(freq="H",
                                          periods=300 * 24,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    df = data["df"]
    time_col = "some_time_col"
    value_col = "some_value_col"
    df.rename({
        cst.TIME_COL: time_col,
        cst.VALUE_COL: value_col
    },
              axis=1,
              inplace=True)
    # prepares params and calls template
    metric = EvaluationMetricEnum.MeanAbsoluteError
    # anomaly adjustment adds 10.0 to every record
    adjustment_size = 10.0
    anomaly_df = pd.DataFrame({
        cst.START_DATE_COL: [df[time_col].min()],
        cst.END_DATE_COL: [df[time_col].max()],
        cst.ADJUSTMENT_DELTA_COL: [adjustment_size],
        cst.METRIC_COL: [value_col]
    })
    anomaly_info = {
        "value_col": cst.VALUE_COL,
        "anomaly_df": anomaly_df,
        "start_date_col": cst.START_DATE_COL,
        "end_date_col": cst.END_DATE_COL,
        "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL,
        "filter_by_dict": {
            cst.METRIC_COL: cst.VALUE_COL
        },
        "adjustment_method": "add"
    }
    metadata = MetadataParam(
        time_col=time_col,
        value_col=value_col,
        freq="H",
        date_format="%Y-%m-%d-%H",
        train_end_date=datetime.datetime(2019, 7, 1),
        anomaly_info=anomaly_info,
    )
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=metric.name,
        cv_report_metrics=[
            EvaluationMetricEnum.MedianAbsolutePercentError.name
        ],
        agg_periods=24,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.8
        },
        relative_error_tolerance=0.01)
    evaluation_period = EvaluationPeriodParam(test_horizon=1,
                                              periods_between_train_test=2,
                                              cv_horizon=3,
                                              cv_min_train_periods=4,
                                              cv_expanding_window=True,
                                              cv_periods_between_splits=5,
                                              cv_periods_between_train_test=6,
                                              cv_max_splits=7)
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": [True],
            "weekly_seasonality": [False],
            "daily_seasonality": [4],
            "add_seasonality_dict": [{
                "yearly": {
                    "period": 365.25,
                    "fourier_order": 20,
                    "prior_scale": 20.0
                },
                "quarterly": {
                    "period": 365.25 / 4,
                    "fourier_order": 15
                },
                "weekly": {
                    "period": 7,
                    "fourier_order": 35,
                    "prior_scale": 30.0
                }
            }]
        },
        growth={"growth_term": "linear"},
        events={
            "holiday_lookup_countries":
            ["UnitedStates", "UnitedKingdom", "India"],
            "holiday_pre_num_days": [2],
            "holiday_post_num_days": [3],
            "holidays_prior_scale": [5.0]
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10.0,
                    "mode": 'additive'
                },
                "regressor2": {
                    "prior_scale": 20.0,
                    "mode": 'multiplicative'
                },
            }]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "changepoints": [None],
            "n_changepoints": [50],
            "changepoint_range": [0.9]
        },
        uncertainty={
            "mcmc_samples": [500],
            "uncertainty_samples": [2000]
        },
        hyperparameter_override={
            "input__response__null__impute_algorithm":
            "ts_interpolate",
            "input__response__null__impute_params": {
                "orders": [7, 14]
            },
            "input__regressors_numeric__normalize__normalize_algorithm":
            "RobustScaler",
        })
    computation = ComputationParam(hyperparameter_budget=10,
                                   n_jobs=None,
                                   verbose=1)
    forecast_horizon = 20
    coverage = 0.7
    config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name,
                            metadata_param=metadata,
                            forecast_horizon=forecast_horizon,
                            coverage=coverage,
                            evaluation_metric_param=evaluation_metric,
                            evaluation_period_param=evaluation_period,
                            model_components_param=model_components,
                            computation_param=computation)
    template = ProphetTemplate()
    params = template.apply_template_for_pipeline_params(df=df, config=config)
    pipeline = params.pop("pipeline", None)
    # Adding start_year and end_year based on the input df
    model_components.events["start_year"] = df[time_col].min().year
    model_components.events["end_year"] = df[time_col].max().year
    expected_params = dict(
        df=df,
        time_col=time_col,
        value_col=value_col,
        date_format=metadata.date_format,
        freq=metadata.freq,
        train_end_date=metadata.train_end_date,
        anomaly_info=metadata.anomaly_info,
        # model
        regressor_cols=template.regressor_cols,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=computation.hyperparameter_budget,
        n_jobs=computation.n_jobs,
        verbose=computation.verbose,
        # forecast
        forecast_horizon=forecast_horizon,
        coverage=coverage,
        test_horizon=evaluation_period.test_horizon,
        periods_between_train_test=evaluation_period.
        periods_between_train_test,
        agg_periods=evaluation_metric.agg_periods,
        agg_func=evaluation_metric.agg_func,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=evaluation_metric.cv_report_metrics,
        null_model_params=evaluation_metric.null_model_params,
        relative_error_tolerance=evaluation_metric.relative_error_tolerance,
        # CV
        cv_horizon=evaluation_period.cv_horizon,
        cv_min_train_periods=evaluation_period.cv_min_train_periods,
        cv_expanding_window=evaluation_period.cv_expanding_window,
        cv_periods_between_splits=evaluation_period.cv_periods_between_splits,
        cv_periods_between_train_test=evaluation_period.
        cv_periods_between_train_test,
        cv_max_splits=evaluation_period.cv_max_splits)
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Ejemplo n.º 30
0
# Create a forecast
# Specifies dataset information
metadata = MetadataParam(
    time_col="ts",  # name of the time column
    value_col="y",  # name of the value column
    freq=
    "D"  #"MS" for Montly at start date, "H" for hourly, "D" for daily, "W" for weekly, etc.
)

forecaster = Forecaster()
result = forecaster.run_forecast_config(
    df=df,
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        forecast_horizon=1000,  # forecasts 100 steps ahead
        coverage=0.95,  # 95% prediction intervals
        metadata_param=metadata))

ts = result.timeseries
fig = ts.plot()
plotly.io.show(fig)

# Cross-validation
grid_search = result.grid_search
cv_results = summarize_grid_search_results(
    grid_search=grid_search,
    decimals=2,
    # The below saves space in the printed output. Remove to show all available metrics and columns.
    cv_report_metrics=None,
    column_order=[