def test_benchmark_silverkite_template_with_real_data():
    # setting every list to 1 item to speed up test case
    forecast_horizons = [30]
    max_cvs = [3]
    fit_algorithms = ["linear"]
    metric = EvaluationMetricEnum.MeanSquaredError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name)

    # real data
    dl = DataLoader()
    data_path = dl.get_data_home(data_sub_dir="daily")
    data_name = "daily_female_births"
    df = dl.get_df(data_path=data_path, data_name="daily_female_births")
    time_col = "Date"
    value_col = "Births"
    metadata = MetadataParam(time_col=time_col, value_col=value_col, freq="D")
    result_silverkite_real = benchmark_silverkite_template(
        data_name=data_name,
        df=df,
        metadata=metadata,
        evaluation_metric=evaluation_metric,
        forecast_horizons=forecast_horizons,
        fit_algorithms=fit_algorithms,
        max_cvs=max_cvs)

    result_silverkite_real = result_silverkite_real[0]
    assert result_silverkite_real["data_name"] == data_name
    assert result_silverkite_real["forecast_model_name"] == "silverkite_linear"
    assert result_silverkite_real["train_period"] == df.shape[0]
    assert result_silverkite_real["forecast_horizon"] == 30
    assert result_silverkite_real["cv_folds"] == 3
def test_benchmark_silverkite_template_with_simulated_data():
    # setting every list to 1 item to speed up test case
    forecast_horizons = [30]
    max_cvs = [3]
    fit_algorithms = ["linear"]
    metric = EvaluationMetricEnum.MeanSquaredError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name)

    # Simulated data
    data_name = "daily_simulated"
    train_period = 365
    data = generate_df_for_tests(freq="D", periods=train_period)
    df = data["df"]
    time_col, value_col = df.columns
    metadata = MetadataParam(time_col=time_col, value_col=value_col, freq="D")
    result_silverkite_simulated = benchmark_silverkite_template(
        data_name=data_name,
        df=df,
        metadata=metadata,
        evaluation_metric=evaluation_metric,
        forecast_horizons=forecast_horizons,
        fit_algorithms=fit_algorithms,
        max_cvs=max_cvs)

    result_silverkite_simulated = result_silverkite_simulated[0]
    assert result_silverkite_simulated["data_name"] == data_name
    assert result_silverkite_simulated["forecast_model_name"] == "silverkite_linear"
    assert result_silverkite_simulated["train_period"] == train_period
    assert result_silverkite_simulated["forecast_horizon"] == 30
    assert result_silverkite_simulated["cv_folds"] == 3
def test_apply_template_for_pipeline_params(df):
    mt = MyTemplate()
    config = ForecastConfig(metadata_param=MetadataParam(
        time_col=NEW_TIME_COL,
        value_col=NEW_VALUE_COL,
    ),
                            evaluation_metric_param=EvaluationMetricParam(
                                cv_selection_metric="MeanSquaredError"))
    original_config = dataclasses.replace(config)

    # Tests apply_template_for_pipeline_params
    pipeline_params = mt.apply_template_for_pipeline_params(df=df,
                                                            config=config)

    assert_equal(pipeline_params["df"], df)
    assert pipeline_params["train_end_date"] is None
    estimator = pipeline_params["pipeline"].steps[-1][-1]
    assert isinstance(estimator, SilverkiteEstimator)
    assert estimator.coverage == mt.config.coverage
    assert mt.estimator is not estimator
    assert mt.estimator.coverage is None
    assert (
        pipeline_params["pipeline"].named_steps["input"].transformer_list[2]
        [1].named_steps["select_reg"].column_names == mt.get_regressor_cols())

    # Tests `apply_template_decorator`
    assert mt.config == mt.apply_forecast_config_defaults(config)
    assert mt.config != config  # `mt.config` has default values added
    assert config == original_config  # `config` is not modified by the function
def test_get_pipeline(df):
    mt = MyTemplate()
    # Initializes attributes needed by the function
    mt.regressor_cols = mt.get_regressor_cols()
    mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"]
    metric = EvaluationMetricEnum.MeanSquaredError
    mt.score_func = metric.name
    mt.score_func_greater_is_better = metric.get_metric_greater_is_better()
    mt.config = ForecastConfig(
        coverage=0.9,
        evaluation_metric_param=EvaluationMetricParam(
            cv_selection_metric=metric.name
        )
    )
    # Checks get_pipeline output
    pipeline = mt.get_pipeline()
    assert isinstance(pipeline, sklearn.pipeline.Pipeline)
    estimator = pipeline.steps[-1][-1]
    assert isinstance(estimator, SilverkiteEstimator)
    assert estimator.coverage == mt.config.coverage
    assert mt.estimator is not estimator
    assert mt.estimator.coverage is None
    expected_col_names = ["regressor1", "regressor2", "regressor_categ", "regressor_bool"]
    assert pipeline.named_steps["input"].transformer_list[2][1].named_steps["select_reg"].column_names == expected_col_names
    assert_eval_function_equal(pipeline.steps[-1][-1].score_func,
                               metric.get_metric_func())
Beispiel #5
0
    def apply_evaluation_metric_defaults(
        evaluation: Optional[EvaluationMetricParam] = None
    ) -> EvaluationMetricParam:
        """Applies the default EvaluationMetricParam values to the given object.
        If an expected attribute value is provided, the value is unchanged. Otherwise the default value for it is used.
        Other attributes are untouched.
        If the input object is None, it creates a EvaluationMetricParam object.

        Parameters
        ----------
        evaluation : `~greykite.framework.templates.autogen.forecast_config.EvaluationMetricParam` or None
            The EvaluationMetricParam object.

        Returns
        -------
        evaluation : `~greykite.framework.templates.autogen.forecast_config.EvaluationMetricParam`
            Valid EvaluationMetricParam object with the provided attribute values and the default attribute values if not.
        """
        if evaluation is None:
            evaluation = EvaluationMetricParam()
        if evaluation.cv_selection_metric is None:
            # NB: subclass may want to override, if designed for a different objective (e.g. quantile loss)
            evaluation.cv_selection_metric = EvaluationMetricEnum.MeanAbsolutePercentError.name
        if evaluation.cv_report_metrics is None:
            evaluation.cv_report_metrics = CV_REPORT_METRICS_ALL
        return evaluation
Beispiel #6
0
def test_forecast_config():
    """Tests ForecastConfig dataclass"""
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        metadata_param=MetadataParam(time_col="custom_time_col",
                                     anomaly_info=[{
                                         "key": "value"
                                     }, {
                                         "key2": "value2"
                                     }]),
        evaluation_period_param=EvaluationPeriodParam(
            test_horizon=10,
            periods_between_train_test=5,
            cv_min_train_periods=20),
        evaluation_metric_param=EvaluationMetricParam(
            cv_selection_metric=EvaluationMetricEnum.MeanSquaredError.name,
            cv_report_metrics=[
                EvaluationMetricEnum.MeanAbsoluteError.name,
                EvaluationMetricEnum.MeanAbsolutePercentError.name
            ],
            relative_error_tolerance=0.02),
        model_components_param=ModelComponentsParam(
            autoregression={"autoreg_dict": {
                "autoreg_param": 0
            }},
            changepoints=None,
            custom={"custom_param": 1},
            growth={"growth_param": 2},
            events={"events_param": 3},
            hyperparameter_override=[{
                "h1": 4
            }, {
                "h2": 5
            }, None],
            regressors={"names": ["regressor1", "regressor2"]},
            lagged_regressors={"lagged_regressor_dict": {
                "lag_reg_param": 0
            }},
            seasonality={"seas_param": 6},
            uncertainty={"uncertainty_param": 7}),
        computation_param=ComputationParam(n_jobs=None))
    assert_forecast_config(config)

    # Tests a string passed to `cv_report_metrics`
    assert ForecastConfig(evaluation_metric_param=EvaluationMetricParam(
        cv_report_metrics=CV_REPORT_METRICS_ALL), ).to_dict()
Beispiel #7
0
def df_config():
    data = generate_df_with_reg_for_tests(freq="W-MON",
                                          periods=140,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]

    model_template = "SILVERKITE"
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name,
        agg_periods=7,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.5
        })
    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=3)
    model_components = ModelComponentsParam(
        regressors={"regressor_cols": reg_cols},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "cv": 2
                }
            }
        })
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90
    config = ForecastConfig(model_template=model_template,
                            computation_param=computation,
                            coverage=coverage,
                            evaluation_metric_param=evaluation_metric,
                            evaluation_period_param=evaluation_period,
                            forecast_horizon=forecast_horizon,
                            model_components_param=model_components)
    return {
        "df": df,
        "config": config,
        "model_template": model_template,
        "reg_cols": reg_cols,
    }
Beispiel #8
0
def test_prophet_template_custom():
    """Tests prophet_template with custom values, with long range input"""
    # prepares input data
    data = generate_df_with_reg_for_tests(freq="H",
                                          periods=300 * 24,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    df = data["df"]
    time_col = "some_time_col"
    value_col = "some_value_col"
    df.rename({
        cst.TIME_COL: time_col,
        cst.VALUE_COL: value_col
    },
              axis=1,
              inplace=True)
    # prepares params and calls template
    metric = EvaluationMetricEnum.MeanAbsoluteError
    # anomaly adjustment adds 10.0 to every record
    adjustment_size = 10.0
    anomaly_df = pd.DataFrame({
        cst.START_DATE_COL: [df[time_col].min()],
        cst.END_DATE_COL: [df[time_col].max()],
        cst.ADJUSTMENT_DELTA_COL: [adjustment_size],
        cst.METRIC_COL: [value_col]
    })
    anomaly_info = {
        "value_col": cst.VALUE_COL,
        "anomaly_df": anomaly_df,
        "start_date_col": cst.START_DATE_COL,
        "end_date_col": cst.END_DATE_COL,
        "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL,
        "filter_by_dict": {
            cst.METRIC_COL: cst.VALUE_COL
        },
        "adjustment_method": "add"
    }
    metadata = MetadataParam(
        time_col=time_col,
        value_col=value_col,
        freq="H",
        date_format="%Y-%m-%d-%H",
        train_end_date=datetime.datetime(2019, 7, 1),
        anomaly_info=anomaly_info,
    )
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=metric.name,
        cv_report_metrics=[
            EvaluationMetricEnum.MedianAbsolutePercentError.name
        ],
        agg_periods=24,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.8
        },
        relative_error_tolerance=0.01)
    evaluation_period = EvaluationPeriodParam(test_horizon=1,
                                              periods_between_train_test=2,
                                              cv_horizon=3,
                                              cv_min_train_periods=4,
                                              cv_expanding_window=True,
                                              cv_periods_between_splits=5,
                                              cv_periods_between_train_test=6,
                                              cv_max_splits=7)
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": [True],
            "weekly_seasonality": [False],
            "daily_seasonality": [4],
            "add_seasonality_dict": [{
                "yearly": {
                    "period": 365.25,
                    "fourier_order": 20,
                    "prior_scale": 20.0
                },
                "quarterly": {
                    "period": 365.25 / 4,
                    "fourier_order": 15
                },
                "weekly": {
                    "period": 7,
                    "fourier_order": 35,
                    "prior_scale": 30.0
                }
            }]
        },
        growth={"growth_term": "linear"},
        events={
            "holiday_lookup_countries":
            ["UnitedStates", "UnitedKingdom", "India"],
            "holiday_pre_num_days": [2],
            "holiday_post_num_days": [3],
            "holidays_prior_scale": [5.0]
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10.0,
                    "mode": 'additive'
                },
                "regressor2": {
                    "prior_scale": 20.0,
                    "mode": 'multiplicative'
                },
            }]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "changepoints": [None],
            "n_changepoints": [50],
            "changepoint_range": [0.9]
        },
        uncertainty={
            "mcmc_samples": [500],
            "uncertainty_samples": [2000]
        },
        hyperparameter_override={
            "input__response__null__impute_algorithm":
            "ts_interpolate",
            "input__response__null__impute_params": {
                "orders": [7, 14]
            },
            "input__regressors_numeric__normalize__normalize_algorithm":
            "RobustScaler",
        })
    computation = ComputationParam(hyperparameter_budget=10,
                                   n_jobs=None,
                                   verbose=1)
    forecast_horizon = 20
    coverage = 0.7
    config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name,
                            metadata_param=metadata,
                            forecast_horizon=forecast_horizon,
                            coverage=coverage,
                            evaluation_metric_param=evaluation_metric,
                            evaluation_period_param=evaluation_period,
                            model_components_param=model_components,
                            computation_param=computation)
    template = ProphetTemplate()
    params = template.apply_template_for_pipeline_params(df=df, config=config)
    pipeline = params.pop("pipeline", None)
    # Adding start_year and end_year based on the input df
    model_components.events["start_year"] = df[time_col].min().year
    model_components.events["end_year"] = df[time_col].max().year
    expected_params = dict(
        df=df,
        time_col=time_col,
        value_col=value_col,
        date_format=metadata.date_format,
        freq=metadata.freq,
        train_end_date=metadata.train_end_date,
        anomaly_info=metadata.anomaly_info,
        # model
        regressor_cols=template.regressor_cols,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=computation.hyperparameter_budget,
        n_jobs=computation.n_jobs,
        verbose=computation.verbose,
        # forecast
        forecast_horizon=forecast_horizon,
        coverage=coverage,
        test_horizon=evaluation_period.test_horizon,
        periods_between_train_test=evaluation_period.
        periods_between_train_test,
        agg_periods=evaluation_metric.agg_periods,
        agg_func=evaluation_metric.agg_func,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=evaluation_metric.cv_report_metrics,
        null_model_params=evaluation_metric.null_model_params,
        relative_error_tolerance=evaluation_metric.relative_error_tolerance,
        # CV
        cv_horizon=evaluation_period.cv_horizon,
        cv_min_train_periods=evaluation_period.cv_min_train_periods,
        cv_expanding_window=evaluation_period.cv_expanding_window,
        cv_periods_between_splits=evaluation_period.cv_periods_between_splits,
        cv_periods_between_train_test=evaluation_period.
        cv_periods_between_train_test,
        cv_max_splits=evaluation_period.cv_max_splits)
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Beispiel #9
0
def test_silverkite_template_custom(model_components_param):
    """"Tests simple_silverkite_template with custom parameters,
    and data that has regressors"""
    data = generate_df_with_reg_for_tests(
        freq="H",
        periods=300*24,
        remove_extra_cols=True,
        mask_test_actuals=True)
    df = data["df"]
    time_col = "some_time_col"
    value_col = "some_value_col"
    df.rename({
        TIME_COL: time_col,
        VALUE_COL: value_col
    }, axis=1, inplace=True)

    metric = EvaluationMetricEnum.MeanAbsoluteError
    # anomaly adjustment adds 10.0 to every record
    adjustment_size = 10.0
    anomaly_df = pd.DataFrame({
        START_DATE_COL: [df[time_col].min()],
        END_DATE_COL: [df[time_col].max()],
        ADJUSTMENT_DELTA_COL: [adjustment_size],
        METRIC_COL: [value_col]
    })
    anomaly_info = {
        "value_col": VALUE_COL,
        "anomaly_df": anomaly_df,
        "start_date_col": START_DATE_COL,
        "end_date_col": END_DATE_COL,
        "adjustment_delta_col": ADJUSTMENT_DELTA_COL,
        "filter_by_dict": {METRIC_COL: VALUE_COL},
        "adjustment_method": "add"
    }
    metadata = MetadataParam(
        time_col=time_col,
        value_col=value_col,
        freq="H",
        date_format="%Y-%m-%d-%H",
        train_end_date=datetime.datetime(2019, 7, 1),
        anomaly_info=anomaly_info
    )
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=metric.name,
        cv_report_metrics=[EvaluationMetricEnum.MedianAbsolutePercentError.name],
        agg_periods=24,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.8
        },
        relative_error_tolerance=0.01
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=1,
        periods_between_train_test=2,
        cv_horizon=3,
        cv_min_train_periods=4,
        cv_expanding_window=True,
        cv_periods_between_splits=5,
        cv_periods_between_train_test=6,
        cv_max_splits=7
    )
    computation = ComputationParam(
        hyperparameter_budget=10,
        n_jobs=None,
        verbose=1
    )
    forecast_horizon = 20
    coverage = 0.7
    template = SilverkiteTemplate()
    params = template.apply_template_for_pipeline_params(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SK.name,
            metadata_param=metadata,
            forecast_horizon=forecast_horizon,
            coverage=coverage,
            evaluation_metric_param=evaluation_metric,
            evaluation_period_param=evaluation_period,
            model_components_param=model_components_param,
            computation_param=computation
        )
    )
    pipeline = params.pop("pipeline", None)
    expected_params = dict(
        df=df,
        time_col=time_col,
        value_col=value_col,
        date_format=metadata.date_format,
        freq=metadata.freq,
        train_end_date=metadata.train_end_date,
        anomaly_info=metadata.anomaly_info,
        # model
        regressor_cols=template.regressor_cols,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=computation.hyperparameter_budget,
        n_jobs=computation.n_jobs,
        verbose=computation.verbose,
        # forecast
        forecast_horizon=forecast_horizon,
        coverage=coverage,
        test_horizon=evaluation_period.test_horizon,
        periods_between_train_test=evaluation_period.periods_between_train_test,
        agg_periods=evaluation_metric.agg_periods,
        agg_func=evaluation_metric.agg_func,
        relative_error_tolerance=evaluation_metric.relative_error_tolerance,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=evaluation_metric.cv_report_metrics,
        null_model_params=evaluation_metric.null_model_params,
        # CV
        cv_horizon=evaluation_period.cv_horizon,
        cv_min_train_periods=evaluation_period.cv_min_train_periods,
        cv_expanding_window=evaluation_period.cv_expanding_window,
        cv_periods_between_splits=evaluation_period.cv_periods_between_splits,
        cv_periods_between_train_test=evaluation_period.cv_periods_between_train_test,
        cv_max_splits=evaluation_period.cv_max_splits
    )
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
def valid_configs():
    metadata = MetadataParam(time_col=TIME_COL, value_col=VALUE_COL, freq="D")
    computation = ComputationParam(hyperparameter_budget=10,
                                   n_jobs=None,
                                   verbose=1)
    forecast_horizon = 2 * 7
    coverage = 0.90
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name,
        cv_report_metrics=None,
        agg_periods=7,
        agg_func=np.mean,
        null_model_params=None)
    evaluation_period = EvaluationPeriodParam(test_horizon=2 * 7,
                                              periods_between_train_test=2 * 7,
                                              cv_horizon=1 * 7,
                                              cv_min_train_periods=8 * 7,
                                              cv_expanding_window=True,
                                              cv_periods_between_splits=7,
                                              cv_periods_between_train_test=3 *
                                              7,
                                              cv_max_splits=2)

    silverkite_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": False,
            "weekly_seasonality": True
        },
        growth={"growth_term": "quadratic"},
        events={
            "holidays_to_model_separately":
            SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES,
            "holiday_lookup_countries": ["UnitedStates"],
            "holiday_pre_num_days": 3,
        },
        changepoints={
            "changepoints_dict": {
                "method": "uniform",
                "n_changepoints": 20,
            }
        },
        regressors={
            "regressor_cols": ["regressor1", "regressor2", "regressor3"]
        },
        uncertainty={
            "uncertainty_dict": "auto",
        },
        hyperparameter_override={"input__response__null__max_frac": 0.1},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "normalize": True
                },
            },
            "feature_sets_enabled": False
        })

    prophet_components = ModelComponentsParam(
        seasonality={
            "seasonality_mode": ["additive"],
            "yearly_seasonality": ["auto"],
            "weekly_seasonality": [True],
            "daily_seasonality": ["auto"],
        },
        growth={"growth_term": ["linear"]},
        events={
            "holiday_pre_num_days": [1],
            "holiday_post_num_days": [1],
            "holidays_prior_scale": [1.0]
        },
        changepoints={
            "changepoint_prior_scale": [0.05],
            "n_changepoints": [1],
            "changepoint_range": [0.5],
        },
        regressors={
            "add_regressor_dict": [{
                "regressor1": {
                    "prior_scale": 10,
                    "standardize": True,
                    "mode": 'additive'
                },
                "regressor2": {
                    "prior_scale": 15,
                    "standardize": False,
                    "mode": 'additive'
                },
                "regressor3": {}
            }]
        },
        uncertainty={"uncertainty_samples": [10]})

    valid_prophet = ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name,
        metadata_param=metadata,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=prophet_components)

    valid_silverkite = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        metadata_param=metadata,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=silverkite_components)

    configs = {
        "valid_prophet": valid_prophet,
        "valid_silverkite": valid_silverkite
    }
    return configs
Beispiel #11
0
def test_run_forecast_config_with_single_simple_silverkite_template():
    # The generic name of single simple silverkite templates are not added to `ModelTemplateEnum`,
    # therefore we test if these are recognized.
    data = generate_df_for_tests(freq="D", periods=365)
    df = data["df"]
    metric = EvaluationMetricEnum.MeanAbsoluteError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name,
                                              agg_periods=7,
                                              agg_func=np.max,
                                              null_model_params={
                                                  "strategy": "quantile",
                                                  "constant": None,
                                                  "quantile": 0.5
                                              })

    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=2)

    model_components = ModelComponentsParam(
        hyperparameter_override=[{
            "estimator__yearly_seasonality": 1
        }, {
            "estimator__yearly_seasonality": 2
        }])
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90

    single_template_class = SimpleSilverkiteTemplateOptions(
        freq=SILVERKITE_COMPONENT_KEYWORDS.FREQ.value.DAILY,
        seas=SILVERKITE_COMPONENT_KEYWORDS.SEAS.value.NONE)

    forecast_config = ForecastConfig(model_template=[
        single_template_class, "DAILY_ALGO_SGD", "SILVERKITE_DAILY_90"
    ],
                                     computation_param=computation,
                                     coverage=coverage,
                                     evaluation_metric_param=evaluation_metric,
                                     evaluation_period_param=evaluation_period,
                                     forecast_horizon=forecast_horizon,
                                     model_components_param=model_components)

    forecaster = Forecaster()
    result = forecaster.run_forecast_config(df=df, config=forecast_config)

    summary = summarize_grid_search_results(result.grid_search)
    # single_template_class is 1 template,
    # "DAILY_ALGO_SGD" is 1 template and "SILVERKITE_DAILY_90" has 4 templates.
    # With 2 items in `hyperparameter_override, there should be a total of 12 cases.
    assert summary.shape[0] == 12

    # Tests functionality for single template class only.
    forecast_config = ForecastConfig(model_template=single_template_class,
                                     computation_param=computation,
                                     coverage=coverage,
                                     evaluation_metric_param=evaluation_metric,
                                     evaluation_period_param=evaluation_period,
                                     forecast_horizon=forecast_horizon)

    forecaster = Forecaster()
    pipeline_parameters = forecaster.apply_forecast_config(
        df=df, config=forecast_config)
    assert_equal(actual=pipeline_parameters["hyperparameter_grid"],
                 expected={
                     "estimator__time_properties": [None],
                     "estimator__origin_for_time_vars": [None],
                     "estimator__train_test_thresh": [None],
                     "estimator__training_fraction": [None],
                     "estimator__fit_algorithm_dict": [{
                         "fit_algorithm":
                         "linear",
                         "fit_algorithm_params":
                         None
                     }],
                     "estimator__holidays_to_model_separately": [[]],
                     "estimator__holiday_lookup_countries": [[]],
                     "estimator__holiday_pre_num_days": [0],
                     "estimator__holiday_post_num_days": [0],
                     "estimator__holiday_pre_post_num_dict": [None],
                     "estimator__daily_event_df_dict": [None],
                     "estimator__changepoints_dict": [None],
                     "estimator__seasonality_changepoints_dict": [None],
                     "estimator__yearly_seasonality": [0],
                     "estimator__quarterly_seasonality": [0],
                     "estimator__monthly_seasonality": [0],
                     "estimator__weekly_seasonality": [0],
                     "estimator__daily_seasonality": [0],
                     "estimator__max_daily_seas_interaction_order": [0],
                     "estimator__max_weekly_seas_interaction_order": [2],
                     "estimator__autoreg_dict": [None],
                     "estimator__min_admissible_value": [None],
                     "estimator__max_admissible_value": [None],
                     "estimator__uncertainty_dict": [None],
                     "estimator__growth_term": ["linear"],
                     "estimator__regressor_cols": [[]],
                     "estimator__feature_sets_enabled": [False],
                     "estimator__extra_pred_cols": [[]]
                 },
                 ignore_keys={"estimator__time_properties": None})
Beispiel #12
0
def test_run_forecast_config_custom():
    """Tests `run_forecast_config` on weekly data with custom config:

     - numeric and categorical regressors
     - coverage
     - null model
    """
    data = generate_df_with_reg_for_tests(freq="W-MON",
                                          periods=140,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]

    metric = EvaluationMetricEnum.MeanAbsoluteError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name,
                                              agg_periods=7,
                                              agg_func=np.max,
                                              null_model_params={
                                                  "strategy": "quantile",
                                                  "constant": None,
                                                  "quantile": 0.5
                                              })

    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=3)

    model_components = ModelComponentsParam(
        regressors={"regressor_cols": reg_cols},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "cv": 2
                }
            }
        })
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90

    forecast_config = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=model_components)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(df=df, config=forecast_config)

        mse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[mse] == pytest.approx(2.976,
                                                                     rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.360,
                                                                     rel=1e-2)
        assert result.forecast.train_evaluation[mse] == pytest.approx(2.224,
                                                                      rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.941,
                                                                      rel=1e-2)
        check_forecast_pipeline_result(result,
                                       coverage=coverage,
                                       strategy=None,
                                       score_func=metric.name,
                                       greater_is_better=False)

    with pytest.raises(KeyError, match="missing_regressor"):
        model_components = ModelComponentsParam(
            regressors={"regressor_cols": ["missing_regressor"]})
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(
            df=df,
            config=ForecastConfig(
                model_template=ModelTemplateEnum.SILVERKITE.name,
                model_components_param=model_components))
        check_forecast_pipeline_result(result,
                                       coverage=None,
                                       strategy=None,
                                       score_func=metric.get_metric_func(),
                                       greater_is_better=False)
Beispiel #13
0
metadata = MetadataParam(
    time_col="ts",  # name of the time column
    value_col="y",  # name of the value column
    freq="D"  # "H" for hourly, "D" for daily, "W" for weekly, etc.
)

# Defines number of periods to forecast into the future
forecast_horizon = 7

# Specifies intended coverage of the prediction interval
coverage = 0.95

# Defines the metrics to evaluate the forecasts
# We use Mean Absolute Percent Error (MAPE) in this tutorial
evaluation_metric = EvaluationMetricParam(
    cv_selection_metric=EvaluationMetricEnum.MeanAbsolutePercentError.name,
    cv_report_metrics=None)

# Defines the cross-validation config within pipeline
evaluation_period = EvaluationPeriodParam(
    cv_max_splits=
    1,  # Benchmarking n_splits is defined in tscv, here we don't need split to choose parameter sets
    periods_between_train_test=0,
)

# Defines parameters related to grid-search computation
computation = ComputationParam(
    hyperparameter_budget=None,
    n_jobs=-1,  # to debug, change to 1 for more informative error messages
    verbose=3)