Ejemplo n.º 1
0
def test_get_regressor_cols():
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.df = pd.DataFrame(columns=["p1", "p2"])
    regressor_cols = template.get_regressor_cols()
    assert regressor_cols is None

    template.config.model_components_param = ModelComponentsParam(
        custom={}
    )
    regressor_cols = template.get_regressor_cols()
    assert regressor_cols is None

    template.config.model_components_param = ModelComponentsParam(
        custom={
            "extra_pred_cols": ["p1", "p2", "p3", template.config.metadata_param.time_col]
        }
    )
    regressor_cols = template.get_regressor_cols()
    assert set(regressor_cols) == {"p1", "p2"}

    template.config.model_components_param = ModelComponentsParam(
        custom={
            "extra_pred_cols": [["p1"], ["p2", "p3"], None, []]
        }
    )
    regressor_cols = template.get_regressor_cols()
    assert set(regressor_cols) == {"p1", "p2"}
Ejemplo n.º 2
0
def test_get_extra_pred_cols():
    extra_pred_cols = get_extra_pred_cols(
        model_components=None)
    assert extra_pred_cols is None

    extra_pred_cols = get_extra_pred_cols(
        model_components=ModelComponentsParam(
            custom={})
    )
    assert extra_pred_cols is None

    extra_pred_cols = get_extra_pred_cols(
        model_components=ModelComponentsParam(
            custom={
                "extra_pred_cols": ["p1", "p2", "p3"]
            }
        )
    )
    assert set(extra_pred_cols) == {"p1", "p2", "p3"}

    extra_pred_cols = get_extra_pred_cols(
        model_components=ModelComponentsParam(
            custom={
                "extra_pred_cols": [["p1"], ["p2", "p3"], None, []]
            }
        )
    )
    assert set(extra_pred_cols) == {"p1", "p2", "p3"}
Ejemplo n.º 3
0
def test_get_config_with_default_model_template_and_components():
    """Tests `__get_config_with_default_model_template_and_components`"""
    forecaster = Forecaster()
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
    )
    assert config == ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        model_components_param=ModelComponentsParam())

    # Overrides `default_model_template_name`, unnests `model_components_param`.
    forecaster = Forecaster(default_model_template_name="SK")
    config = ForecastConfig(model_components_param=[ModelComponentsParam()])
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
        config)
    assert config == ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        model_components_param=ModelComponentsParam())

    # Overrides `model_template_enum` and `default_model_template_name`
    forecaster = Forecaster(model_template_enum=MyModelTemplateEnum,
                            default_model_template_name="MYSILVERKITE")
    config = forecaster._Forecaster__get_config_with_default_model_template_and_components(
    )
    assert config == ForecastConfig(
        model_template=MyModelTemplateEnum.MYSILVERKITE.name,
        model_components_param=ModelComponentsParam())
Ejemplo n.º 4
0
def test_prophet_hyperparameter_grid_exception():
    """Tests prophet_hyperparameter_grid exceptions"""
    # unknown argument
    with pytest.raises(ValueError, match=r"Unexpected key\(s\) found"):
        model_components = ModelComponentsParam(
            seasonality={"unknown_seasonality": ["additive"]})
        template = ProphetTemplate()
        template.config = template.apply_forecast_config_defaults()
        template.config.model_components_param = model_components
        template.get_hyperparameter_grid()

    # regressor must be specified under `add_regressor_dict`, not directly
    with pytest.raises(ValueError, match=r"Unexpected key\(s\) found"):
        model_components = ModelComponentsParam(
            regressors={
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            })
        template = ProphetTemplate()
        template.config = template.apply_forecast_config_defaults()
        template.config.model_components_param = model_components
        template.get_hyperparameter_grid()
def test_apply_model_components_defaults():
    """Tests apply_model_components_defaults"""
    assert ForecastConfigDefaults().apply_model_components_defaults(
        None) == ModelComponentsParam()
    mcp = ModelComponentsParam({"growth": "growth"})
    assert ForecastConfigDefaults().apply_model_components_defaults(mcp) == mcp
    assert ForecastConfigDefaults().apply_model_components_defaults([mcp
                                                                     ]) == mcp
    assert ForecastConfigDefaults().apply_model_components_defaults(
        [None, mcp]) == [ModelComponentsParam(), mcp]
def test_get_regressor_cols():
    """Tests get_regressor_names"""
    template = AutoArimaTemplate()
    # no regressors
    model_components = ModelComponentsParam()
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(regressors={})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None
Ejemplo n.º 7
0
def test_get_lagged_regressor_info():
    # Without lagged regressors
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    expected_lagged_regressor_info = {
        "lagged_regressor_cols": None,
        "overall_min_lag_order": None,
        "overall_max_lag_order": None
    }
    assert template.get_lagged_regressor_info() == expected_lagged_regressor_info

    # With lagged regressors
    template.config.model_components_param = ModelComponentsParam(
        lagged_regressors={
            "lagged_regressor_dict": [{
                "regressor2": {
                    "lag_dict": {"orders": [5]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7 * 2, 7 * 3]],
                        "interval_list": [(8, 7 * 2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            }, {
                "regressor_bool": {
                    "lag_dict": {"orders": [1]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7 * 2]],
                        "interval_list": [(8, 7 * 2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            }]
        })
    lagged_regressor_info = template.get_lagged_regressor_info()
    assert set(lagged_regressor_info["lagged_regressor_cols"]) == {"regressor2", "regressor_bool"}
    assert lagged_regressor_info["overall_min_lag_order"] == 1
    assert lagged_regressor_info["overall_max_lag_order"] == 21
Ejemplo n.º 8
0
def model_components_param(silverkite, silverkite_diagnostics):
    return ModelComponentsParam(
        seasonality={
            "fs_components_df": None
        },
        events=None,
        changepoints={
            "changepoints_dict": {
                "method": "uniform",
                "n_changepoints": 20,
            }
        },
        uncertainty={
            "uncertainty_dict": {
                "uncertainty_method": "simple_conditional_residuals",
                # `quantiles` is not provided, requires `config.coverage` to be set
                # "params": {
                #     "conditional_cols": ["dow_hr"],
                #     "quantiles": [0.02, 0.98],
                #     "quantile_estimation_method": "normal_fit",
                #     "sample_size_thresh": 5,
                #     "small_sample_size_method": "std_quantiles",
                #     "small_sample_size_quantile": 0.98,
                # }
            }
        },
        custom={
            "silverkite": silverkite,
            "silverkite_diagnostics": silverkite_diagnostics,
            "extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
            "max_admissible_value": 4,
        }
    )
Ejemplo n.º 9
0
    def apply_model_components_defaults(model_components: Optional[Union[ModelComponentsParam, List[Optional[ModelComponentsParam]]]] = None) \
            -> Union[ModelComponentsParam, List[ModelComponentsParam]]:
        """Applies the default ModelComponentsParam values to the given object.

        Converts None to a ModelComponentsParam object.
        Unpacks a list of a single element to the element itself.

        Parameters
        ----------
        model_components : `~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or None or list of such items
            The ModelComponentsParam object.

        Returns
        -------
        model_components : `~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or list of such items
            Valid ModelComponentsParam object with the provided attribute values and the default attribute values if not.
        """
        # Converts single element to a list
        if not isinstance(model_components, list):
            model_components = [model_components]
        # Replaces all `None` with ModelComponentsParam()
        model_components = [
            m if m is not None else ModelComponentsParam()
            for m in model_components
        ]
        # model_components can be provided as a list or a single element.
        # A list of a single element is unpacked to that element.
        # (Some template classes like SilverkiteTemplate do not allow model_components
        # to be a list.)
        if isinstance(model_components, list) and len(model_components) == 1:
            model_components = model_components[0]
        return model_components
Ejemplo n.º 10
0
def test_prophet_hyperparameter_grid_seasonality_growth(default_holidays):
    """Tests get_hyperparameter_grid for basic seasonality, growth and other default params"""
    seasonality = {"yearly_seasonality": [True], "weekly_seasonality": [False]}
    growth = {"growth_term": ["linear"]}
    model_components = ModelComponentsParam(seasonality=seasonality,
                                            growth=growth)
    template = ProphetTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.config.model_components_param = model_components
    hyperparameter_grid = template.get_hyperparameter_grid()
    # Expected Values
    expected_holidays = default_holidays
    expected_grid = {
        "estimator__growth": ["linear"],
        "estimator__seasonality_mode": ["additive"],
        "estimator__seasonality_prior_scale": [10.0],
        "estimator__yearly_seasonality": [True],
        "estimator__weekly_seasonality": [False],
        "estimator__daily_seasonality": ["auto"],
        "estimator__add_seasonality_dict": [None],
        "estimator__holidays": [expected_holidays],
        "estimator__holidays_prior_scale": [10.0],
        "estimator__changepoint_prior_scale": [0.05],
        "estimator__changepoints": [None],
        "estimator__n_changepoints": [25],
        "estimator__changepoint_range": [0.8],
        "estimator__mcmc_samples": [0],
        "estimator__uncertainty_samples": [1000],
        "estimator__add_regressor_dict": [None]
    }
    # Assertions
    assert_equal(actual=hyperparameter_grid, expected=expected_grid)
Ejemplo n.º 11
0
def df_config():
    data = generate_df_with_reg_for_tests(freq="W-MON",
                                          periods=140,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]

    model_template = "SILVERKITE"
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name,
        agg_periods=7,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.5
        })
    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=3)
    model_components = ModelComponentsParam(
        regressors={"regressor_cols": reg_cols},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "cv": 2
                }
            }
        })
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90
    config = ForecastConfig(model_template=model_template,
                            computation_param=computation,
                            coverage=coverage,
                            evaluation_metric_param=evaluation_metric,
                            evaluation_period_param=evaluation_period,
                            forecast_horizon=forecast_horizon,
                            model_components_param=model_components)
    return {
        "df": df,
        "config": config,
        "model_template": model_template,
        "reg_cols": reg_cols,
    }
Ejemplo n.º 12
0
def test_prophet_hyperparameter_grid_events():
    """Tests get_prophet_hyperparameter_grid for selected Countries" holidays"""
    # holiday params
    start_year = 2018
    end_year = 2022
    holiday_pre_num_days = [1]
    holiday_post_num_days = [1]
    holiday_lookup_countries = ["UnitedStates", "China", "India"]
    holidays_prior_scale = [5.0, 10.0, 15.0]
    events = {
        "holiday_lookup_countries": holiday_lookup_countries,
        "holiday_pre_num_days": holiday_pre_num_days,
        "holiday_post_num_days": holiday_post_num_days,
        "start_year": start_year,
        "end_year": end_year,
        "holidays_prior_scale": holidays_prior_scale
    }
    model_components = ModelComponentsParam(events=events)
    template = ProphetTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.config.model_components_param = model_components
    hyperparameter_grid = template.get_hyperparameter_grid()

    # Expected Values
    # Holidays df, based on given holidays params
    expected_holidays = template.get_prophet_holidays(
        year_list=list(range(start_year - 1, end_year + 2)),
        countries=holiday_lookup_countries,
        lower_window=-holiday_pre_num_days[0],
        upper_window=holiday_post_num_days[0])

    expected_grid = {
        "estimator__growth": ["linear"],
        "estimator__seasonality_mode": ["additive"],
        "estimator__seasonality_prior_scale": [10.0],
        "estimator__yearly_seasonality": ["auto"],
        "estimator__weekly_seasonality": ["auto"],
        "estimator__daily_seasonality": ["auto"],
        "estimator__add_seasonality_dict": [None],
        "estimator__holidays": [expected_holidays],
        "estimator__holidays_prior_scale": [5.0, 10.0, 15.0],
        "estimator__changepoint_prior_scale": [0.05],
        "estimator__changepoints": [None],
        "estimator__n_changepoints": [25],
        "estimator__changepoint_range": [0.8],
        "estimator__mcmc_samples": [0],
        "estimator__uncertainty_samples": [1000],
        "estimator__add_regressor_dict": [None]
    }
    # Assertions
    assert_equal(actual=hyperparameter_grid, expected=expected_grid)
Ejemplo n.º 13
0
def test_forecast_config():
    """Tests ForecastConfig dataclass"""
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        metadata_param=MetadataParam(time_col="custom_time_col",
                                     anomaly_info=[{
                                         "key": "value"
                                     }, {
                                         "key2": "value2"
                                     }]),
        evaluation_period_param=EvaluationPeriodParam(
            test_horizon=10,
            periods_between_train_test=5,
            cv_min_train_periods=20),
        evaluation_metric_param=EvaluationMetricParam(
            cv_selection_metric=EvaluationMetricEnum.MeanSquaredError.name,
            cv_report_metrics=[
                EvaluationMetricEnum.MeanAbsoluteError.name,
                EvaluationMetricEnum.MeanAbsolutePercentError.name
            ],
            relative_error_tolerance=0.02),
        model_components_param=ModelComponentsParam(
            autoregression={"autoreg_dict": {
                "autoreg_param": 0
            }},
            changepoints=None,
            custom={"custom_param": 1},
            growth={"growth_param": 2},
            events={"events_param": 3},
            hyperparameter_override=[{
                "h1": 4
            }, {
                "h2": 5
            }, None],
            regressors={"names": ["regressor1", "regressor2"]},
            lagged_regressors={"lagged_regressor_dict": {
                "lag_reg_param": 0
            }},
            seasonality={"seas_param": 6},
            uncertainty={"uncertainty_param": 7}),
        computation_param=ComputationParam(n_jobs=None))
    assert_forecast_config(config)

    # Tests a string passed to `cv_report_metrics`
    assert ForecastConfig(evaluation_metric_param=EvaluationMetricParam(
        cv_report_metrics=CV_REPORT_METRICS_ALL), ).to_dict()
Ejemplo n.º 14
0
def test_hyperparameter_override(default_holidays):
    """Tests the hyperparameter_override functionality.
    Use hyperparameter_override to override parameters and
    create multiple sets of grids.
    """
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": [True, False],
            "weekly_seasonality": False,
        },
        hyperparameter_override=[{
            "input__response__null__max_frac": 0.1,
            "estimator__yearly_seasonality": True,
            "estimator__growth": ["logistic"],
        }, {}])
    template = ProphetTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.config.model_components_param = model_components
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__growth": ["linear"],
        "estimator__seasonality_mode": ["additive"],
        "estimator__seasonality_prior_scale": [10.0],
        "estimator__yearly_seasonality": [True, False],
        "estimator__weekly_seasonality": [False],
        "estimator__daily_seasonality": ["auto"],
        "estimator__add_seasonality_dict": [None],
        "estimator__holidays": [default_holidays],
        "estimator__holidays_prior_scale": [10.0],
        "estimator__changepoint_prior_scale": [0.05],
        "estimator__changepoints": [None],
        "estimator__n_changepoints": [25],
        "estimator__changepoint_range": [0.8],
        "estimator__mcmc_samples": [0],
        "estimator__uncertainty_samples": [1000],
        "estimator__add_regressor_dict": [None]
    }
    updated_grid = expected_grid.copy()
    updated_grid["input__response__null__max_frac"] = [0.1]
    updated_grid["estimator__yearly_seasonality"] = [True]
    updated_grid["estimator__growth"] = ["logistic"]
    assert_equal(hyperparameter_grid, [updated_grid, expected_grid])
Ejemplo n.º 15
0
def test_run_template_4():
    """Runs custom template with monthly data and auto-regression"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=["ct2"]),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.95, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Ejemplo n.º 16
0
def test_estimator_plot_components_from_forecaster():
    """Tests estimator's plot_components function after the Forecaster has set everything up at the top most level"""
    # Test with real data (Female-births) via model template
    dl = DataLoader()
    data_path = dl.get_data_home(data_sub_dir="daily")
    df = dl.get_df(data_path=data_path, data_name="daily_female_births")
    metadata = MetadataParam(time_col="Date", value_col="Births", freq="D")
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": True,
            "quarterly_seasonality": True,
            "weekly_seasonality": True,
            "daily_seasonality": False
        })
    result = Forecaster().run_forecast_config(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SILVERKITE.name,
            forecast_horizon=30,  # forecast 1 month
            coverage=0.95,  # 95% prediction intervals
            metadata_param=metadata,
            model_components_param=model_components))
    estimator = result.model.steps[-1][-1]
    assert estimator.plot_components()
Ejemplo n.º 17
0
def test_get_regressor_cols():
    """Tests get_regressor_names"""
    # `add_regressor_dict` is a list of dict
    template = ProphetTemplate()
    model_components = ModelComponentsParam(
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }, None, {
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor4": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor5": {}
            }]
        })
    template.config = ForecastConfig(model_components_param=model_components)
    assert set(template.get_regressor_cols()) == {
        "regressor1", "regressor2", "regressor3", "regressor4", "regressor5"
    }

    # `add_regressor_dict` is a single dict
    model_components = ModelComponentsParam(
        regressors={
            "add_regressor_dict": {
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }
        })
    template.config = ForecastConfig(model_components_param=model_components)
    assert set(template.get_regressor_cols()) == {
        "regressor1", "regressor2", "regressor3"
    }

    # no regressors
    model_components = ModelComponentsParam()
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(regressors={})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(
        regressors={"add_regressor_dict": []})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None

    model_components = ModelComponentsParam(
        regressors={"add_regressor_dict": [{}, None]})
    template.config = ForecastConfig(model_components_param=model_components)
    assert template.get_regressor_cols() is None
Ejemplo n.º 18
0
def test_run_prophet_template_custom():
    """Tests running prophet template through the pipeline"""
    data = generate_df_with_reg_for_tests(freq="D",
                                          periods=50,
                                          train_frac=0.8,
                                          conti_year_origin=2018,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    # select relevant columns for testing
    relevant_cols = [
        cst.TIME_COL, cst.VALUE_COL, "regressor1", "regressor2", "regressor3"
    ]
    df = data["df"][relevant_cols]
    forecast_horizon = data["fut_time_num"]

    # Model components - custom holidays; other params as defaults
    model_components = ModelComponentsParam(
        seasonality={
            "seasonality_mode": ["additive"],
            "yearly_seasonality": ["auto"],
            "weekly_seasonality": [True],
            "daily_seasonality": ["auto"],
        },
        growth={"growth_term": ["linear"]},
        events={
            "holiday_pre_num_days": [1],
            "holiday_post_num_days": [1],
            "holidays_prior_scale": [1.0]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "n_changepoints": [1],
            "changepoint_range": [0.5],
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": "additive"
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": "additive"
                },
                "regressor3": {}
            }]
        },
        uncertainty={"uncertainty_samples": [10]})

    metadata = MetadataParam(
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL,
        freq="D",
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=5,  # speeds up test case
        periods_between_train_test=5,
        cv_horizon=0,  # speeds up test case
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name,
        metadata_param=metadata,
        forecast_horizon=forecast_horizon,
        coverage=0.95,
        model_components_param=model_components,
        evaluation_period_param=evaluation_period,
    )
    result = Forecaster().run_forecast_config(
        df=df,
        config=config,
    )

    forecast_df = result.forecast.df_test.reset_index(drop=True)
    expected_cols = [
        "ts", "actual", "forecast", "forecast_lower", "forecast_upper"
    ]
    assert list(forecast_df.columns) == expected_cols
    assert result.backtest.coverage == 0.95, "coverage is not correct"
    # NB: coverage is poor because of very small dataset size and low uncertainty_samples
    assert result.backtest.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.677, rel=1e-3), \
        "training coverage is None or less than expected"
    assert result.backtest.test_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.800, rel=1e-3), \
        "testing coverage is None or less than expected"
    assert result.backtest.train_evaluation["MSE"] == pytest.approx(3.7849, rel=1e-3), \
        "training MSE is None or more than expected"
    assert result.backtest.test_evaluation["MSE"] == pytest.approx(2.9609, rel=1e-3), \
        "testing MSE is None or more than expected"
    assert result.forecast.train_evaluation[cst.PREDICTION_BAND_COVERAGE] == pytest.approx(0.7805, rel=1e-3), \
        "forecast coverage is None or less than expected"
    assert result.forecast.train_evaluation["MSE"] == pytest.approx(4.1806, rel=1e-3), \
        "forecast MSE is None or more than expected"

    # ensure regressors were used in the model
    prophet_estimator = result.model.steps[-1][-1]
    regressors = prophet_estimator.model.extra_regressors
    assert regressors.keys() == {"regressor1", "regressor2", "regressor3"}
    assert regressors["regressor1"]["prior_scale"] == 10.0
    assert regressors["regressor1"]["standardize"] is True
    assert regressors["regressor1"]["mode"] == "additive"
    assert regressors["regressor2"]["prior_scale"] == 15.0
    assert regressors["regressor3"]["standardize"] == "auto"
Ejemplo n.º 19
0
def seek_the_oracle(
    df_index,
    series,
    col,
    forecast_length,
    freq,
    prediction_interval=0.9,
    model_template='silverkite',
    growth=None,
    holiday=True,
    holiday_country="UnitedStates",
    regressors=None,
    verbose=0,
    inner_n_jobs=1,
    **kwargs
):
    """Internal. For loop or parallel version of Greykite."""
    inner_df = pd.DataFrame(
        {
            'ts': df_index,
            'y': series,
        }
    )
    if regressors is not None:
        inner_regr = regressors.copy()
        new_names = [
            'rrrr' + str(x) if x in inner_df.columns else str(x)
            for x in inner_regr.columns
        ]
        inner_regr.columns = new_names
        inner_regr.index.name = 'ts'
        inner_regr.reset_index(drop=False, inplace=True)
        inner_df = inner_df.merge(inner_regr, left_on='ts', right_on='ts', how='outer')
    metadata = MetadataParam(
        time_col="ts",  # name of the time column ("date" in example above)
        value_col="y",  # name of the value column ("sessions" in example above)
        freq=freq,  # "H" for hourly, "D" for daily, "W" for weekly, etc.
    )
    # INCLUDE forecast_length lagged mean and std of other features!
    model_template = ModelTemplateEnum.SILVERKITE.name
    forecaster = Forecaster()  # Creates forecasts and stores the result
    if regressors is not None:
        model_components = ModelComponentsParam(
            growth=growth, regressors={"regressor_cols": new_names}
        )
    else:
        model_components = ModelComponentsParam(
            growth=growth,  # 'linear', 'quadratic', 'sqrt'
        )
    computation = ComputationParam(n_jobs=inner_n_jobs, verbose=verbose)
    if holiday:  # also 'auto'
        model_components.events = {
            # These holidays as well as their pre/post dates are modeled as individual events.
            "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES,  # all holidays in "holiday_lookup_countries"
            "holiday_lookup_countries": [
                holiday_country
            ],  # only look up holidays in the United States
            "holiday_pre_num_days": 1,  # also mark the 1 days before a holiday as holiday
            "holiday_post_num_days": 1,  # also mark the 1 days after a holiday as holiday
        }
    config = ForecastConfig(
        model_template=model_template,
        forecast_horizon=forecast_length,
        coverage=prediction_interval,
        model_components_param=model_components,
        metadata_param=metadata,
        computation_param=computation,
    )
    result = forecaster.run_forecast_config(  # result is also stored as `forecaster.forecast_result`.
        df=inner_df,
        config=config,
    )
    res_df = result.forecast.df.tail(forecast_length).drop(columns=['actual'])
    res_df['series_id'] = col
    return res_df
Ejemplo n.º 20
0
def test_prophet_hyperparameter_grid_changepoints_uncertainty_custom(
        default_holidays):
    """Tests get_prophet_hyperparameter_grid for selected
    changepoints, regressor, and uncertainty"""
    changepoints = {
        "changepoint_prior_scale": [0.05, 1.0, 5.0, 10.0, 15.0],
        "changepoints": [None, ["2018-10-11", "2018-11-11", "2019-01-17"]],
        "n_changepoints": [25, 50, 100],
        "changepoint_range": [0.8, 0.9]
    }
    uncertainty = {
        "mcmc_samples": [0, 1000],
        "uncertainty_samples": [1000, 2000]
    }
    regressors = {
        "add_regressor_dict": [{
            "reg_col1": {
                "prior_scale": 10.0,
                "standardize": False,
                "mode": "additive"
            },
            "reg_col2": {
                "prior_scale": 20.0,
                "standardize": True,
                "mode": "multiplicative"
            }
        }, {
            "reg_col1": {
                "prior_scale": 20.0,
                "standardize": True,
                "mode": "additive"
            },
            "reg_col2": {
                "prior_scale": 40.0,
                "standardize": False,
                "mode": "multiplicative"
            }
        }]
    }
    model_components = ModelComponentsParam(changepoints=changepoints,
                                            regressors=regressors,
                                            uncertainty=uncertainty)
    template = ProphetTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.config.model_components_param = model_components
    hyperparameter_grid = template.get_hyperparameter_grid()

    # Expected Values
    expected_holidays = default_holidays
    expected_grid = {
        "estimator__growth": ["linear"],
        "estimator__seasonality_mode": ["additive"],
        "estimator__seasonality_prior_scale": [10.0],
        "estimator__yearly_seasonality": ["auto"],
        "estimator__weekly_seasonality": ["auto"],
        "estimator__daily_seasonality": ["auto"],
        "estimator__add_seasonality_dict": [None],
        "estimator__holidays": [expected_holidays],
        "estimator__holidays_prior_scale": [10.0],
        "estimator__changepoint_prior_scale": [0.05, 1.0, 5.0, 10.0, 15.0],
        "estimator__changepoints":
        [None, ["2018-10-11", "2018-11-11", "2019-01-17"]],
        "estimator__n_changepoints": [25, 50, 100],
        "estimator__changepoint_range": [0.8, 0.9],
        "estimator__mcmc_samples": [0, 1000],
        "estimator__uncertainty_samples": [1000, 2000],
        "estimator__add_regressor_dict": [{
            "reg_col1": {
                "prior_scale": 10.0,
                "standardize": False,
                "mode": "additive"
            },
            "reg_col2": {
                "prior_scale": 20.0,
                "standardize": True,
                "mode": "multiplicative"
            }
        }, {
            "reg_col1": {
                "prior_scale": 20.0,
                "standardize": True,
                "mode": "additive"
            },
            "reg_col2": {
                "prior_scale": 40.0,
                "standardize": False,
                "mode": "multiplicative"
            }
        }]
    }
    assert_equal(actual=hyperparameter_grid, expected=expected_grid)
Ejemplo n.º 21
0
    train_end_date=datetime.datetime(2016, 1, 20))

# %%
# Next we define the ``ModelComponentsParam`` class based on the discussion on relevant features.
# The ``ModelComponentsParam`` include properties related to the model itself.

model_components = ModelComponentsParam(
    seasonality=seasonality,
    growth=growth,
    events=events,
    changepoints=changepoints,
    autoregression=None,
    regressors=regressors,  # is_football_season defined above
    uncertainty={
        "uncertainty_dict": "auto",
    },
    custom={
        # What algorithm is used to learn the relationship between the time series and the features.
        # Regularized fitting algorithms are recommended to mitigate high correlations and over-fitting.
        # If you are not sure what algorithm to use, "ridge" is a good choice.
        "fit_algorithm_dict": {
            "fit_algorithm": "ridge",
        },
        "extra_pred_cols":
        extra_pred_cols  # the interaction between is_football_season and weekly seasonality defined above
    })

# %%
# Now let's run the model with the new configuration.
# The evaluation config is kept the same as the previous case;
# this is important for a fair comparison of parameter sets.
Ejemplo n.º 22
0
def test_apply_default_model_components(model_components_param, silverkite, silverkite_diagnostics):
    model_components = apply_default_model_components()
    assert_equal(model_components.seasonality, {
        "fs_components_df": [pd.DataFrame({
            "name": ["tod", "tow", "tom", "toq", "toy"],
            "period": [24.0, 7.0, 1.0, 1.0, 1.0],
            "order": [3, 3, 1, 1, 5],
            "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})],
    })
    assert model_components.growth == {}
    assert model_components.events == {
        "daily_event_df_dict": [None],
    }
    assert model_components.changepoints == {
        "changepoints_dict": [None],
        "seasonality_changepoints_dict": [None],
    }
    assert model_components.autoregression == {
        "autoreg_dict": [None],
    }
    assert model_components.regressors == {}
    assert model_components.uncertainty == {
        "uncertainty_dict": [None],
    }
    assert_equal(model_components.custom, {
        "silverkite": [SilverkiteForecast()],
        "silverkite_diagnostics": [SilverkiteDiagnostics()],
        "origin_for_time_vars": [None],
        "extra_pred_cols": ["ct1"],  # linear growth
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
        "max_admissible_value": [None],
    }, ignore_keys={
        "silverkite": None,
        "silverkite_diagnostics": None
    })
    assert model_components.custom["silverkite"][0] != silverkite  # a different instance was created
    assert model_components.custom["silverkite_diagnostics"][0] != silverkite_diagnostics

    # overwrite some parameters
    time_properties = {
        "origin_for_time_vars": 2020
    }
    original_components = dataclasses.replace(model_components_param)  # creates a copy
    updated_components = apply_default_model_components(
        model_components=model_components_param,
        time_properties=time_properties)
    assert original_components == model_components_param  # not mutated by the function
    assert updated_components.seasonality == model_components_param.seasonality
    assert updated_components.events == {
        "daily_event_df_dict": [None],
    }
    assert updated_components.changepoints == {
        "changepoints_dict": {  # combination of defaults and provided params
            "method": "uniform",
            "n_changepoints": 20,
        },
        "seasonality_changepoints_dict": [None],
    }
    assert updated_components.autoregression == {"autoreg_dict": [None]}
    assert updated_components.uncertainty == model_components_param.uncertainty
    assert updated_components.custom == {  # combination of defaults and provided params
        "silverkite": silverkite,  # the same object that was passed in (not a copy)
        "silverkite_diagnostics": silverkite_diagnostics,
        "origin_for_time_vars": [time_properties["origin_for_time_vars"]],  # from time_properties
        "extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
        "max_admissible_value": 4,
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
    }

    # `time_properties` without start_year key
    updated_components = apply_default_model_components(
        model_components=model_components_param,
        time_properties={})
    assert updated_components.custom["origin_for_time_vars"] == [None]

    updated_components = apply_default_model_components(
        model_components=ModelComponentsParam(
            autoregression={
                "autoreg_dict": {
                    "lag_dict": {"orders": [7]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7*2, 7*3]],
                        "interval_list": [(7, 7*2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            })
    )

    autoreg_dict = updated_components.autoregression["autoreg_dict"]
    assert autoreg_dict["lag_dict"] == {"orders": [7]}
    assert autoreg_dict["agg_lag_dict"]["orders_list"] == [[7, 14, 21]]
    assert autoreg_dict["agg_lag_dict"]["interval_list"] == [(7, 14)]
Ejemplo n.º 23
0
    def apply_prophet_model_components_defaults(self,
                                                model_components=None,
                                                time_properties=None):
        """Sets default values for ``model_components``.

        Called by ``get_hyperparameter_grid`` after ``time_properties` is defined.
        Requires ``time_properties`` as well as ``model_components``
        so we do not simply override
        `~greykite.framework.templates.forecast_config_defaults.ForecastConfigDefaults.apply_model_components_defaults`.

        Parameters
        ----------
        model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or None, default None
            Configuration of model growth, seasonality, events, etc.
            See the docstring of this class for details.
        time_properties : `dict` [`str`, `any`] or None, default None
            Time properties dictionary (likely produced by
            `~greykite.common.time_properties_forecast.get_forecast_time_properties`)
            with keys:

                ``"period"`` : `int`
                    Period of each observation (i.e. minimum time between observations, in seconds).
                ``"simple_freq"`` : `SimpleTimeFrequencyEnum`
                    ``SimpleTimeFrequencyEnum`` member corresponding to data frequency.
                ``"num_training_points"`` : `int`
                    Number of observations for training.
                ``"num_training_days"`` : `int`
                    Number of days for training.
                ``"start_year"`` : `int`
                    Start year of the training period.
                ``"end_year"`` : `int`
                    End year of the forecast period.
                ``"origin_for_time_vars"`` : `float`
                    Continuous time representation of the first date in ``df``.

            If None, start_year is set to 2015 and end_year to 2030.

        Returns
        -------
        model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam`
            The provided ``model_components`` with default values set
        """
        if model_components is None:
            model_components = ModelComponentsParam()
        else:
            # makes a copy to avoid mutating input
            model_components = dataclasses.replace(model_components)
        if time_properties is None:
            time_properties = {
                "start_year": 2015,
                "end_year": 2030,
            }

        # seasonality
        default_seasonality = {
            "seasonality_mode": ["additive"],
            "seasonality_prior_scale": [10.0],
            "yearly_seasonality": ['auto'],
            "weekly_seasonality": ['auto'],
            "daily_seasonality": ['auto'],
            "add_seasonality_dict": [None]
        }
        # If seasonality params are not provided, uses default params. Otherwise, prefers provided params.
        # `allow_unknown_keys=False` requires `model_components.seasonality` keys to be a subset of
        # `default_seasonality` keys.
        model_components.seasonality = update_dictionary(
            default_dict=default_seasonality,
            overwrite_dict=model_components.seasonality,
            allow_unknown_keys=False)

        # growth
        default_growth = {"growth_term": ["linear"]}
        model_components.growth = update_dictionary(
            default_dict=default_growth,
            overwrite_dict=model_components.growth,
            allow_unknown_keys=False)

        # events
        default_events = {
            "holiday_lookup_countries":
            "auto",  # see `get_prophet_holidays` for defaults
            "holiday_pre_num_days": [2],
            "holiday_post_num_days": [2],
            "start_year": time_properties["start_year"],
            "end_year": time_properties["end_year"],
            "holidays_prior_scale": [10.0]
        }
        model_components.events = update_dictionary(
            default_dict=default_events,
            overwrite_dict=model_components.events,
            allow_unknown_keys=False)

        # Creates events dictionary for prophet estimator
        # Expands the range of holiday years by 1 year on each end, to ensure we have coverage of most relevant holidays.
        year_list = list(
            range(model_components.events["start_year"] - 1,
                  model_components.events["end_year"] + 2))
        # Currently we support only one set of holiday_lookup_countries, holiday_pre_num_days and holiday_post_num_days.
        # Shows a warning if user supplies >1 set.
        if len(model_components.events["holiday_pre_num_days"]) > 1:
            warnings.warn(
                f"`events['holiday_pre_num_days']` list has more than 1 element. We currently support only 1 element. "
                f"Using {model_components.events['holiday_pre_num_days'][0]}.")
        if len(model_components.events["holiday_post_num_days"]) > 1:
            warnings.warn(
                f"`events['holiday_post_num_days']` list has more than 1 element. We currently support only 1 element. "
                f"Using {model_components.events['holiday_post_num_days'][0]}."
            )
        # If events["holiday_lookup_countries"] has multiple options, picks the first option
        if (model_components.events["holiday_lookup_countries"] is not None and
                model_components.events["holiday_lookup_countries"] != "auto"):
            if len(model_components.events["holiday_lookup_countries"]) > 1:
                # There are multiple elements
                if (any(
                        isinstance(x, list) for x in
                        model_components.events["holiday_lookup_countries"])
                        or None
                        in model_components.events["holiday_lookup_countries"]
                        or "auto" in
                        model_components.events["holiday_lookup_countries"]):
                    # Not a flat list of country names
                    warnings.warn(
                        f"`events['holiday_lookup_countries']` contains multiple options. "
                        f"We currently support only 1 option. Using {model_components.events['holiday_lookup_countries'][0]}."
                    )
                    model_components.events[
                        "holiday_lookup_countries"] = model_components.events[
                            "holiday_lookup_countries"][0]
            elif isinstance(
                    model_components.events["holiday_lookup_countries"][0],
                (list, tuple)):
                # There's only one element, and it's a list of countries
                model_components.events[
                    "holiday_lookup_countries"] = model_components.events[
                        "holiday_lookup_countries"][0]

        model_components.events = {
            "holidays_df":
            self.get_prophet_holidays(
                year_list=year_list,
                countries=model_components.events["holiday_lookup_countries"],
                # holiday effect is modeled from "holiday_pre_num_days" days before
                # to "holiday_post_num_days" days after the holiday
                lower_window=-model_components.events["holiday_pre_num_days"]
                [0],  # Prophet expects a negative value for `lower_window`
                upper_window=model_components.events["holiday_post_num_days"]
                [0]),
            "holidays_prior_scale":
            model_components.events["holidays_prior_scale"]
        }

        # changepoints_dict
        default_changepoints = {
            "changepoint_prior_scale": [0.05],
            "changepoints": [None],
            "n_changepoints": [25],
            "changepoint_range": [0.8]
        }
        model_components.changepoints = update_dictionary(
            default_dict=default_changepoints,
            overwrite_dict=model_components.changepoints,
            allow_unknown_keys=False)

        # uncertainty
        default_uncertainty = {
            "mcmc_samples": [0],
            "uncertainty_samples": [1000]
        }
        model_components.uncertainty = update_dictionary(
            default_dict=default_uncertainty,
            overwrite_dict=model_components.uncertainty,
            allow_unknown_keys=False)

        # regressors
        default_regressors = {"add_regressor_dict": [None]}
        model_components.regressors = update_dictionary(
            default_dict=default_regressors,
            overwrite_dict=model_components.regressors,
            allow_unknown_keys=False)

        # there are no custom parameters for Prophet

        # sets to {} if None, for each item if
        # `model_components.hyperparameter_override` is a list of dictionaries
        model_components.hyperparameter_override = update_dictionaries(
            {}, overwrite_dicts=model_components.hyperparameter_override)

        return model_components
Ejemplo n.º 24
0
def test_prophet_hyperparameter_grid_warn():
    """Tests get_prophet_hyperparameter_grid warnings"""
    # holiday params
    start_year = 2018
    end_year = 2022
    holiday_pre_num_days = [1, 2, 3, 4]
    holiday_post_num_days = [1, 2, 3, 4]
    holiday_lookup_countries = [["UnitedStates"],
                                ["UnitedStates", "China", "India"]]
    holidays_prior_scale = [5.0, 10.0, 15.0]
    events = {
        "holiday_lookup_countries": holiday_lookup_countries,
        "holiday_pre_num_days": holiday_pre_num_days,
        "holiday_post_num_days": holiday_post_num_days,
        "start_year": start_year,
        "end_year": end_year,
        "holidays_prior_scale": holidays_prior_scale
    }
    model_components = ModelComponentsParam(events=events)
    with pytest.warns(Warning) as record:
        template = ProphetTemplate()
        template.config = template.apply_forecast_config_defaults()
        template.config.model_components_param = model_components
        template.get_hyperparameter_grid()
        assert f"`events['holiday_pre_num_days']` list has more than 1 element. "\
               f"We currently support only 1 element. "\
               f"Using 1." in record[0].message.args[0]
        assert f"`events['holiday_post_num_days']` list has more than 1 element. " \
               f"We currently support only 1 element. " \
               f"Using 1." in record[1].message.args[0]
        # Extra spaces for holidays to align with actual warning in the function, because of an extra tab.
        assert f"`events['holiday_lookup_countries']` contains multiple options. "\
               f"We currently support only 1 option. Using ['UnitedStates']." in record[2].message.args[0]

    # other invalid holiday_lookup_countries
    with pytest.warns(Warning) as record:
        events["holiday_pre_num_days"] = [1]
        events["holiday_post_num_days"] = [0]
        events["holiday_lookup_countries"] = ["auto", None]
        model_components = ModelComponentsParam(events=events)
        template = ProphetTemplate()
        template.config = template.apply_forecast_config_defaults()
        template.config.model_components_param = model_components
        template.get_hyperparameter_grid()
        assert f"`events['holiday_lookup_countries']` contains multiple options. " \
               f"We currently support only 1 option. Using auto." in record[0].message.args[0]

    # no warning if only one list of holiday_lookup_countries is provided
    with pytest.warns(None):
        events["holiday_pre_num_days"] = [1]
        events["holiday_post_num_days"] = [0]
        events["holiday_lookup_countries"] = [[
            "UnitedStates", "China", "UnitedKingdom", "India"
        ]]
        template = ProphetTemplate()
        template.config = template.apply_forecast_config_defaults()
        template.config.model_components_param = ModelComponentsParam(
            events=events)
        hyp1 = template.get_hyperparameter_grid()

        events["holiday_lookup_countries"] = [
            "UnitedStates", "China", "UnitedKingdom", "India"
        ]
        template = ProphetTemplate()
        template.config = template.apply_forecast_config_defaults()
        template.config.model_components_param = ModelComponentsParam(
            events=events)
        hyp2 = template.get_hyperparameter_grid()
        assert_equal(hyp1, hyp2)
Ejemplo n.º 25
0
def test_prophet_hyperparameter_grid_custom_seasonality(default_holidays):
    """Tests get_prophet_hyperparameter_grid for custom seasonality params, other params being defaults"""
    seasonality = {
        "seasonality_mode": ["additive", "multiplicative"],
        "seasonality_prior_scale": [5.0, 10.0, 15.0],
        "yearly_seasonality": [True, False],
        "weekly_seasonality": [True, False],
        "daily_seasonality": [True, False],
        "add_seasonality_dict": [{
            "yearly": {
                "period": 365.25,
                "fourier_order": 20,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 15
            },
            "weekly": {
                "period": 7,
                "fourier_order": 35,
                "prior_scale": 30.0
            }
        }, {
            "yearly": {
                "period": 365.25,
                "fourier_order": 10,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 3
            },
            "weekly": {
                "period": 7,
                "fourier_order": 5,
                "prior_scale": 20.0
            }
        }, {
            "yearly": {
                "period": 365.25,
                "fourier_order": 10,
                "prior_scale": 30.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 5
            },
            "weekly": {
                "period": 7,
                "fourier_order": 15,
                "prior_scale": 20.0
            }
        }, {
            "yearly": {
                "period": 365.25,
                "fourier_order": 15,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 10
            },
            "weekly": {
                "period": 7,
                "fourier_order": 25,
                "prior_scale": 20.0
            }
        }]
    }
    model_components = ModelComponentsParam(seasonality=seasonality)
    template = ProphetTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.config.model_components_param = model_components
    hyperparameter_grid = template.get_hyperparameter_grid()

    # Expected Values
    expected_holidays = default_holidays
    expected_grid = {
        "estimator__growth": ["linear"],
        "estimator__seasonality_mode": ["additive", "multiplicative"],
        "estimator__seasonality_prior_scale": [5.0, 10.0, 15.0],
        "estimator__yearly_seasonality": [True, False],
        "estimator__weekly_seasonality": [True, False],
        "estimator__daily_seasonality": [True, False],
        "estimator__add_seasonality_dict": [{
            "yearly": {
                "period": 365.25,
                "fourier_order": 20,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 15
            },
            "weekly": {
                "period": 7,
                "fourier_order": 35,
                "prior_scale": 30.0
            }
        }, {
            "yearly": {
                "period": 365.25,
                "fourier_order": 10,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 3
            },
            "weekly": {
                "period": 7,
                "fourier_order": 5,
                "prior_scale": 20.0
            }
        }, {
            "yearly": {
                "period": 365.25,
                "fourier_order": 10,
                "prior_scale": 30.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 5
            },
            "weekly": {
                "period": 7,
                "fourier_order": 15,
                "prior_scale": 20.0
            }
        }, {
            "yearly": {
                "period": 365.25,
                "fourier_order": 15,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 10
            },
            "weekly": {
                "period": 7,
                "fourier_order": 25,
                "prior_scale": 20.0
            }
        }],
        "estimator__holidays": [expected_holidays],
        "estimator__holidays_prior_scale": [10.0],
        "estimator__changepoint_prior_scale": [0.05],
        "estimator__changepoints": [None],
        "estimator__n_changepoints": [25],
        "estimator__changepoint_range": [0.8],
        "estimator__mcmc_samples": [0],
        "estimator__uncertainty_samples": [1000],
        "estimator__add_regressor_dict": [None]
    }
    # Assertions
    assert_equal(actual=hyperparameter_grid, expected=expected_grid)
Ejemplo n.º 26
0
metadata = MetadataParam(
    time_col="ts",  # name of the time column
    value_col="y",  # name of the value column
    freq="D"  # "H" for hourly, "D" for daily, "W" for weekly, etc.
)

# Specifies model parameters
model_components = ModelComponentsParam(changepoints={
    "changepoints_dict": {
        "method": "auto",
        "potential_changepoint_n": 25,
        "regularization_strength": 0.5,
        "resample_freq": "7D",
        "no_changepoint_distance_from_end": "365D"
    }
},
                                        uncertainty={
                                            "uncertainty_dict": "auto",
                                        },
                                        custom={
                                            "fit_algorithm_dict": {
                                                "fit_algorithm": "linear",
                                            },
                                        })

# Runs the forecast
forecaster = Forecaster()
result = forecaster.run_forecast_config(
    df=df,
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
Ejemplo n.º 27
0
def test_prophet_hyperparameter_grid_auto_list(default_holidays):
    """Tests `get_prophet_hyperparameter_grid` automatic list conversion
    via `dictionaries_values_to_lists`. Holidays are tested separately
    because they are not directly passed to ProphetEstimator."""
    growth = {"growth_term": "linear"}
    seasonality = {
        "seasonality_mode": "multiplicative",
        "seasonality_prior_scale": 10.0,
        "yearly_seasonality": False,
        "weekly_seasonality": False,
        "daily_seasonality": True,
        "add_seasonality_dict": {
            "yearly": {
                "period": 365.25,
                "fourier_order": 20,
                "prior_scale": 20.0
            },
            "quarterly": {
                "period": 365.25 / 4,
                "fourier_order": 15
            },
            "weekly": {
                "period": 7,
                "fourier_order": 35,
                "prior_scale": 30.0
            }
        }
    }

    changepoints = {
        "changepoint_prior_scale": 0.05,
        "changepoints": ["2018-10-11", "2018-11-11", "2019-01-17"],
        "n_changepoints": 25,
        "changepoint_range": 0.8
    }
    uncertainty = {"mcmc_samples": 0, "uncertainty_samples": 1000}
    regressors = {
        "add_regressor_dict": {
            "reg_col1": {
                "prior_scale": 10.0,
                "standardize": False,
                "mode": "additive"
            },
            "reg_col2": {
                "prior_scale": 20.0,
                "standardize": True,
                "mode": "multiplicative"
            }
        }
    }
    model_components = ModelComponentsParam(growth=growth,
                                            seasonality=seasonality,
                                            changepoints=changepoints,
                                            regressors=regressors,
                                            uncertainty=uncertainty)
    template = ProphetTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.config.model_components_param = model_components
    hyperparameter_grid = template.get_hyperparameter_grid()

    # Expected Values
    expected_grid = {
        "estimator__growth": ["linear"],
        "estimator__seasonality_mode": ["multiplicative"],
        "estimator__seasonality_prior_scale": [10.0],
        "estimator__yearly_seasonality": [False],
        "estimator__weekly_seasonality": [False],
        "estimator__daily_seasonality": [True],
        "estimator__add_seasonality_dict":
        [seasonality["add_seasonality_dict"]],
        "estimator__holidays": [default_holidays],
        "estimator__holidays_prior_scale": [10.0],
        "estimator__changepoint_prior_scale": [0.05],
        "estimator__changepoints": [["2018-10-11", "2018-11-11",
                                     "2019-01-17"]],
        "estimator__n_changepoints": [25],
        "estimator__changepoint_range": [0.8],
        "estimator__mcmc_samples": [0],
        "estimator__uncertainty_samples": [1000],
        "estimator__add_regressor_dict": [{
            "reg_col1": {
                "prior_scale": 10.0,
                "standardize": False,
                "mode": "additive"
            },
            "reg_col2": {
                "prior_scale": 20.0,
                "standardize": True,
                "mode": "multiplicative"
            }
        }]
    }
    assert_equal(actual=hyperparameter_grid, expected=expected_grid)
Ejemplo n.º 28
0
def test_run_template_5():
    """Runs custom template with monthly data, auto-regression and lagged regressors"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols_all = ["regressor1", "regressor2", "regressor_categ"]
    reg_cols = ["regressor1"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols_all
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=reg_cols),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        lagged_regressors={
            "lagged_regressor_dict": [
                {"regressor2": "auto"},
                {"regressor_categ": {"lag_dict": {"orders": [5]}}}
            ]},
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.46, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
        # Checks lagged regressor columns
        actual_pred_cols = set(result.model[-1].model_dict["pred_cols"])
        actual_x_mat_cols = set(result.model[-1].model_dict["x_mat"].columns)
        expected_pred_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5'
        }
        expected_x_mat_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5[T.c2]',
            'regressor_categ_lag5[T.c2]'
        }
        assert expected_pred_cols.issubset(actual_pred_cols)
        assert expected_x_mat_cols.issubset(actual_x_mat_cols)
Ejemplo n.º 29
0
def test_prophet_template_custom():
    """Tests prophet_template with custom values, with long range input"""
    # prepares input data
    data = generate_df_with_reg_for_tests(freq="H",
                                          periods=300 * 24,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    df = data["df"]
    time_col = "some_time_col"
    value_col = "some_value_col"
    df.rename({
        cst.TIME_COL: time_col,
        cst.VALUE_COL: value_col
    },
              axis=1,
              inplace=True)
    # prepares params and calls template
    metric = EvaluationMetricEnum.MeanAbsoluteError
    # anomaly adjustment adds 10.0 to every record
    adjustment_size = 10.0
    anomaly_df = pd.DataFrame({
        cst.START_DATE_COL: [df[time_col].min()],
        cst.END_DATE_COL: [df[time_col].max()],
        cst.ADJUSTMENT_DELTA_COL: [adjustment_size],
        cst.METRIC_COL: [value_col]
    })
    anomaly_info = {
        "value_col": cst.VALUE_COL,
        "anomaly_df": anomaly_df,
        "start_date_col": cst.START_DATE_COL,
        "end_date_col": cst.END_DATE_COL,
        "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL,
        "filter_by_dict": {
            cst.METRIC_COL: cst.VALUE_COL
        },
        "adjustment_method": "add"
    }
    metadata = MetadataParam(
        time_col=time_col,
        value_col=value_col,
        freq="H",
        date_format="%Y-%m-%d-%H",
        train_end_date=datetime.datetime(2019, 7, 1),
        anomaly_info=anomaly_info,
    )
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=metric.name,
        cv_report_metrics=[
            EvaluationMetricEnum.MedianAbsolutePercentError.name
        ],
        agg_periods=24,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.8
        },
        relative_error_tolerance=0.01)
    evaluation_period = EvaluationPeriodParam(test_horizon=1,
                                              periods_between_train_test=2,
                                              cv_horizon=3,
                                              cv_min_train_periods=4,
                                              cv_expanding_window=True,
                                              cv_periods_between_splits=5,
                                              cv_periods_between_train_test=6,
                                              cv_max_splits=7)
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": [True],
            "weekly_seasonality": [False],
            "daily_seasonality": [4],
            "add_seasonality_dict": [{
                "yearly": {
                    "period": 365.25,
                    "fourier_order": 20,
                    "prior_scale": 20.0
                },
                "quarterly": {
                    "period": 365.25 / 4,
                    "fourier_order": 15
                },
                "weekly": {
                    "period": 7,
                    "fourier_order": 35,
                    "prior_scale": 30.0
                }
            }]
        },
        growth={"growth_term": "linear"},
        events={
            "holiday_lookup_countries":
            ["UnitedStates", "UnitedKingdom", "India"],
            "holiday_pre_num_days": [2],
            "holiday_post_num_days": [3],
            "holidays_prior_scale": [5.0]
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10.0,
                    "mode": 'additive'
                },
                "regressor2": {
                    "prior_scale": 20.0,
                    "mode": 'multiplicative'
                },
            }]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "changepoints": [None],
            "n_changepoints": [50],
            "changepoint_range": [0.9]
        },
        uncertainty={
            "mcmc_samples": [500],
            "uncertainty_samples": [2000]
        },
        hyperparameter_override={
            "input__response__null__impute_algorithm":
            "ts_interpolate",
            "input__response__null__impute_params": {
                "orders": [7, 14]
            },
            "input__regressors_numeric__normalize__normalize_algorithm":
            "RobustScaler",
        })
    computation = ComputationParam(hyperparameter_budget=10,
                                   n_jobs=None,
                                   verbose=1)
    forecast_horizon = 20
    coverage = 0.7
    config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name,
                            metadata_param=metadata,
                            forecast_horizon=forecast_horizon,
                            coverage=coverage,
                            evaluation_metric_param=evaluation_metric,
                            evaluation_period_param=evaluation_period,
                            model_components_param=model_components,
                            computation_param=computation)
    template = ProphetTemplate()
    params = template.apply_template_for_pipeline_params(df=df, config=config)
    pipeline = params.pop("pipeline", None)
    # Adding start_year and end_year based on the input df
    model_components.events["start_year"] = df[time_col].min().year
    model_components.events["end_year"] = df[time_col].max().year
    expected_params = dict(
        df=df,
        time_col=time_col,
        value_col=value_col,
        date_format=metadata.date_format,
        freq=metadata.freq,
        train_end_date=metadata.train_end_date,
        anomaly_info=metadata.anomaly_info,
        # model
        regressor_cols=template.regressor_cols,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=computation.hyperparameter_budget,
        n_jobs=computation.n_jobs,
        verbose=computation.verbose,
        # forecast
        forecast_horizon=forecast_horizon,
        coverage=coverage,
        test_horizon=evaluation_period.test_horizon,
        periods_between_train_test=evaluation_period.
        periods_between_train_test,
        agg_periods=evaluation_metric.agg_periods,
        agg_func=evaluation_metric.agg_func,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=evaluation_metric.cv_report_metrics,
        null_model_params=evaluation_metric.null_model_params,
        relative_error_tolerance=evaluation_metric.relative_error_tolerance,
        # CV
        cv_horizon=evaluation_period.cv_horizon,
        cv_min_train_periods=evaluation_period.cv_min_train_periods,
        cv_expanding_window=evaluation_period.cv_expanding_window,
        cv_periods_between_splits=evaluation_period.cv_periods_between_splits,
        cv_periods_between_train_test=evaluation_period.
        cv_periods_between_train_test,
        cv_max_splits=evaluation_period.cv_max_splits)
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Ejemplo n.º 30
0
def test_run_template_2():
    """Runs custom template with all options"""
    data = generate_df_with_reg_for_tests(
        freq="D",
        periods=400,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    daily_event_df_dict = generate_holiday_events(
        countries=["UnitedStates"],
        holidays_to_model_separately=["New Year's Day"],
        year_start=2017,
        year_end=2022,
        pre_num=2,
        post_num=2)
    event_pred_cols = get_event_pred_cols(daily_event_df_dict)
    model_components = ModelComponentsParam(
        seasonality={
            "fs_components_df": pd.DataFrame({
                "name": ["tow", "tom", "toq", "toy"],
                "period": [7.0, 1.0, 1.0, 1.0],
                "order": [2, 1, 1, 5],
                "seas_names": ["weekly", "monthly", "quarterly", "yearly"]
            })
        },
        events={
            "daily_event_df_dict": daily_event_df_dict
        },
        changepoints={
            "changepoints_dict": {
                "method": "auto",
                "yearly_seasonality_order": 3,
                "regularization_strength": 0.5,
                "resample_freq": "14D",
                "potential_changepoint_distance": "56D",
                "no_changepoint_proportion_from_end": 0.2
            },
            "seasonality_changepoints_dict": {
                "potential_changepoint_distance": "60D",
                "regularization_strength": 0.5,
                "no_changepoint_proportion_from_end": 0.2
            },
        },
        autoregression=None,
        uncertainty={
            "uncertainty_dict": None,
        },
        custom={
            "origin_for_time_vars": None,
            "extra_pred_cols": [["ct1"] + reg_cols + event_pred_cols],  # growth, regressors, events
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {"cv": 2}
            },
            "min_admissible_value": min(df[VALUE_COL]) - abs(max(df[VALUE_COL])),
            "max_admissible_value": max(df[VALUE_COL]) * 2,
        }
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.692, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.531, rel=1e-2)
        assert result.backtest.test_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.823, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.304, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.921, rel=1e-2)
        assert result.forecast.train_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.897, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)