Beispiel #1
0
    def __init__(self,
                 silverkite: SilverkiteForecast = SilverkiteForecast(),
                 silverkite_diagnostics:
                 SilverkiteDiagnostics = SilverkiteDiagnostics(),
                 score_func: callable = mean_squared_error,
                 coverage: float = None,
                 null_model_params: Optional[Dict] = None,
                 uncertainty_dict: Optional[Dict] = None):
        # initializes null model
        super().__init__(score_func=score_func,
                         coverage=coverage,
                         null_model_params=null_model_params)

        # required in subclasses __init__
        self.uncertainty_dict = uncertainty_dict

        # set by `fit`
        # fitted model in dictionary format returned from
        # the `forecast_silverkite` function
        self.silverkite: SilverkiteForecast = silverkite
        self.silverkite_diagnostics: SilverkiteDiagnostics = silverkite_diagnostics
        self.model_dict = None
        self.pred_cols = None
        self.feature_cols = None
        self.df = None
        self.coef_ = None
        # Predictor category, lazy initialization as None.
        # Will be updated in property function pred_category when needed.
        self._pred_category = None
        self.extra_pred_cols = None  # all silverkite estimators should support this.

        # set by the predict method
        self.forecast = None
        # set by the summary method
        self.model_summary = None
Beispiel #2
0
def silverkite_diagnostics():
    return SilverkiteDiagnostics()
Beispiel #3
0
def test_get_silverkite_hyperparameter_grid(model_components_param, silverkite, silverkite_diagnostics):
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__silverkite": [SilverkiteForecast()],
        "estimator__silverkite_diagnostics": [SilverkiteDiagnostics()],
        "estimator__origin_for_time_vars": [None],
        "estimator__extra_pred_cols": [["ct1"]],
        "estimator__train_test_thresh": [None],
        "estimator__training_fraction": [None],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None}],
        "estimator__daily_event_df_dict": [None],
        "estimator__fs_components_df": [pd.DataFrame({
            "name": ["tod", "tow", "tom", "toq", "toy"],
            "period": [24.0, 7.0, 1.0, 1.0, 1.0],
            "order": [3, 3, 1, 1, 5],
            "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})],
        "estimator__autoreg_dict": [None],
        "estimator__changepoints_dict": [None],
        "estimator__seasonality_changepoints_dict": [None],
        "estimator__changepoint_detector": [None],
        "estimator__min_admissible_value": [None],
        "estimator__max_admissible_value": [None],
        "estimator__uncertainty_dict": [None],
    }
    assert_equal(hyperparameter_grid, expected_grid, ignore_keys={"estimator__silverkite": None, "estimator__silverkite_diagnostics": None})
    assert hyperparameter_grid["estimator__silverkite"][0] != silverkite
    assert hyperparameter_grid["estimator__silverkite_diagnostics"][0] != silverkite_diagnostics

    # Tests auto-list conversion
    template.config.model_components_param = model_components_param
    template.time_properties = {"origin_for_time_vars": 2020}
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__silverkite": [silverkite],
        "estimator__silverkite_diagnostics": [silverkite_diagnostics],
        "estimator__origin_for_time_vars": [2020],
        "estimator__extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
        "estimator__train_test_thresh": [None],
        "estimator__training_fraction": [None],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "estimator__daily_event_df_dict": [None],
        "estimator__fs_components_df": [None],
        "estimator__autoreg_dict": [None],
        "estimator__changepoints_dict": [{
            "method": "uniform",
            "n_changepoints": 20,
        }],
        "estimator__seasonality_changepoints_dict": [None],
        "estimator__changepoint_detector": [None],
        "estimator__min_admissible_value": [None],
        "estimator__max_admissible_value": [4],
        "estimator__uncertainty_dict": [{
            "uncertainty_method": "simple_conditional_residuals"
        }],
    }
    assert_equal(hyperparameter_grid, expected_grid)

    # Tests hyperparameter_override
    template.config.model_components_param.hyperparameter_override = [
        {
            "input__response__null__max_frac": 0.1,
            "estimator__min_admissible_value": [2],
            "estimator__extra_pred_cols": ["override_estimator__extra_pred_cols"],
        },
        {},
        {
            "estimator__extra_pred_cols": ["val1", "val2"],
            "estimator__origin_for_time_vars": [2019],
        },
        None
    ]
    template.time_properties = {"origin_for_time_vars": 2020}
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid["estimator__origin_for_time_vars"] = [2020]
    updated_grid1 = expected_grid.copy()
    updated_grid1["input__response__null__max_frac"] = [0.1]
    updated_grid1["estimator__min_admissible_value"] = [2]
    updated_grid1["estimator__extra_pred_cols"] = [["override_estimator__extra_pred_cols"]]
    updated_grid2 = expected_grid.copy()
    updated_grid2["estimator__extra_pred_cols"] = [["val1", "val2"]]
    updated_grid2["estimator__origin_for_time_vars"] = [2019]
    expected_grid = [
        updated_grid1,
        expected_grid,
        updated_grid2,
        expected_grid]
    assert_equal(hyperparameter_grid, expected_grid)
Beispiel #4
0
def test_apply_default_model_components(model_components_param, silverkite, silverkite_diagnostics):
    model_components = apply_default_model_components()
    assert_equal(model_components.seasonality, {
        "fs_components_df": [pd.DataFrame({
            "name": ["tod", "tow", "tom", "toq", "toy"],
            "period": [24.0, 7.0, 1.0, 1.0, 1.0],
            "order": [3, 3, 1, 1, 5],
            "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})],
    })
    assert model_components.growth == {}
    assert model_components.events == {
        "daily_event_df_dict": [None],
    }
    assert model_components.changepoints == {
        "changepoints_dict": [None],
        "seasonality_changepoints_dict": [None],
    }
    assert model_components.autoregression == {
        "autoreg_dict": [None],
    }
    assert model_components.regressors == {}
    assert model_components.uncertainty == {
        "uncertainty_dict": [None],
    }
    assert_equal(model_components.custom, {
        "silverkite": [SilverkiteForecast()],
        "silverkite_diagnostics": [SilverkiteDiagnostics()],
        "origin_for_time_vars": [None],
        "extra_pred_cols": ["ct1"],  # linear growth
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
        "max_admissible_value": [None],
    }, ignore_keys={
        "silverkite": None,
        "silverkite_diagnostics": None
    })
    assert model_components.custom["silverkite"][0] != silverkite  # a different instance was created
    assert model_components.custom["silverkite_diagnostics"][0] != silverkite_diagnostics

    # overwrite some parameters
    time_properties = {
        "origin_for_time_vars": 2020
    }
    original_components = dataclasses.replace(model_components_param)  # creates a copy
    updated_components = apply_default_model_components(
        model_components=model_components_param,
        time_properties=time_properties)
    assert original_components == model_components_param  # not mutated by the function
    assert updated_components.seasonality == model_components_param.seasonality
    assert updated_components.events == {
        "daily_event_df_dict": [None],
    }
    assert updated_components.changepoints == {
        "changepoints_dict": {  # combination of defaults and provided params
            "method": "uniform",
            "n_changepoints": 20,
        },
        "seasonality_changepoints_dict": [None],
    }
    assert updated_components.autoregression == {"autoreg_dict": [None]}
    assert updated_components.uncertainty == model_components_param.uncertainty
    assert updated_components.custom == {  # combination of defaults and provided params
        "silverkite": silverkite,  # the same object that was passed in (not a copy)
        "silverkite_diagnostics": silverkite_diagnostics,
        "origin_for_time_vars": [time_properties["origin_for_time_vars"]],  # from time_properties
        "extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
        "max_admissible_value": 4,
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
    }

    # `time_properties` without start_year key
    updated_components = apply_default_model_components(
        model_components=model_components_param,
        time_properties={})
    assert updated_components.custom["origin_for_time_vars"] == [None]

    updated_components = apply_default_model_components(
        model_components=ModelComponentsParam(
            autoregression={
                "autoreg_dict": {
                    "lag_dict": {"orders": [7]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7*2, 7*3]],
                        "interval_list": [(7, 7*2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            })
    )

    autoreg_dict = updated_components.autoregression["autoreg_dict"]
    assert autoreg_dict["lag_dict"] == {"orders": [7]}
    assert autoreg_dict["agg_lag_dict"]["orders_list"] == [[7, 14, 21]]
    assert autoreg_dict["agg_lag_dict"]["interval_list"] == [(7, 14)]
    def __init__(
            self,
            silverkite: SimpleSilverkiteForecast = SimpleSilverkiteForecast(),
            silverkite_diagnostics:
        SilverkiteDiagnostics = SilverkiteDiagnostics(),
            score_func: callable = mean_squared_error,
            coverage: float = None,
            null_model_params: Optional[Dict] = None,
            time_properties: Optional[Dict] = None,
            freq: Optional[str] = None,
            forecast_horizon: Optional[int] = None,
            origin_for_time_vars: Optional[float] = None,
            train_test_thresh: Optional[datetime] = None,
            training_fraction: Optional[float] = None,
            fit_algorithm_dict: Optional[Dict] = None,
            holidays_to_model_separately: Optional[Union[str,
                                                         List[str]]] = "auto",
            holiday_lookup_countries: Optional[Union[str, List[str]]] = "auto",
            holiday_pre_num_days: int = 2,
            holiday_post_num_days: int = 2,
            holiday_pre_post_num_dict: Optional[Dict] = None,
            daily_event_df_dict: Optional[Dict] = None,
            changepoints_dict: Optional[Dict] = None,
            yearly_seasonality: Union[bool, str, int] = "auto",
            quarterly_seasonality: Union[bool, str, int] = "auto",
            monthly_seasonality: Union[bool, str, int] = "auto",
            weekly_seasonality: Union[bool, str, int] = "auto",
            daily_seasonality: Union[bool, str, int] = "auto",
            max_daily_seas_interaction_order: Optional[int] = None,
            max_weekly_seas_interaction_order: Optional[int] = None,
            autoreg_dict: Optional[Dict] = None,
            seasonality_changepoints_dict: Optional[Dict] = None,
            min_admissible_value: Optional[float] = None,
            max_admissible_value: Optional[float] = None,
            uncertainty_dict: Optional[Dict] = None,
            growth_term: Optional[str] = "linear",
            regressor_cols: Optional[List[str]] = None,
            feature_sets_enabled: Optional[Union[bool, Dict[str,
                                                            bool]]] = None,
            extra_pred_cols: Optional[List[str]] = None,
            regression_weight_col: Optional[str] = None,
            simulation_based: Optional[bool] = False):
        # every subclass of BaseSilverkiteEstimator must call super().__init__
        super().__init__(silverkite=silverkite,
                         silverkite_diagnostics=silverkite_diagnostics,
                         score_func=score_func,
                         coverage=coverage,
                         null_model_params=null_model_params,
                         uncertainty_dict=uncertainty_dict)

        # necessary to set parameters, to ensure get_params() works
        # (used in grid search)
        self.score_func = score_func
        self.coverage = coverage
        self.null_model_params = null_model_params
        self.time_properties = time_properties
        self.freq = freq
        self.forecast_horizon = forecast_horizon
        self.origin_for_time_vars = origin_for_time_vars
        self.train_test_thresh = train_test_thresh
        self.training_fraction = training_fraction
        self.fit_algorithm_dict = fit_algorithm_dict
        self.holidays_to_model_separately = holidays_to_model_separately
        self.holiday_lookup_countries = holiday_lookup_countries
        self.holiday_pre_num_days = holiday_pre_num_days
        self.holiday_post_num_days = holiday_post_num_days
        self.holiday_pre_post_num_dict = holiday_pre_post_num_dict
        self.daily_event_df_dict = daily_event_df_dict
        self.changepoints_dict = changepoints_dict
        self.yearly_seasonality = yearly_seasonality
        self.quarterly_seasonality = quarterly_seasonality
        self.monthly_seasonality = monthly_seasonality
        self.weekly_seasonality = weekly_seasonality
        self.daily_seasonality = daily_seasonality
        self.max_daily_seas_interaction_order = max_daily_seas_interaction_order
        self.max_weekly_seas_interaction_order = max_weekly_seas_interaction_order
        self.autoreg_dict = autoreg_dict
        self.seasonality_changepoints_dict = seasonality_changepoints_dict
        self.min_admissible_value = min_admissible_value
        self.max_admissible_value = max_admissible_value
        self.uncertainty_dict = uncertainty_dict
        self.growth_term = growth_term
        self.regressor_cols = regressor_cols
        self.feature_sets_enabled = feature_sets_enabled
        self.extra_pred_cols = extra_pred_cols
        self.regression_weight_col = regression_weight_col
        self.simulation_based = simulation_based
Beispiel #6
0
 def _set_silverkite_diagnostics_params(self):
     if self.silverkite_diagnostics is None:
         self.silverkite_diagnostics = SilverkiteDiagnostics()
     self.silverkite_diagnostics.set_params(self.pred_category,
                                            self.time_col_, self.value_col_)
def test_plot_silverkite_components():
    """Tests plot_silverkite_components function"""
    silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics()
    # Dataframe with trend, seasonality and events
    time_col = "ts"
    # value_col name is chosen such that it contains keywords "ct" and "sin"
    # so that we can test patterns specified for each component work correctly
    value_col = "basin_impact"
    df = pd.DataFrame({
        time_col: [
            datetime.datetime(2018, 1, 1),
            datetime.datetime(2018, 1, 2),
            datetime.datetime(2018, 1, 3),
            datetime.datetime(2018, 1, 4),
            datetime.datetime(2018, 1, 5)
        ],
        value_col: [1, 2, 3, 4, 5],
    })
    feature_df = pd.DataFrame({
        # Trend columns: growth, changepoints and interactions (total 5 columns)
        "ct1":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct1:tod":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct_sqrt":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint0_2018_01_02_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint1_2018_01_04_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        # Daily seasonality with interaction (total 4 columns)
        "sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        # Yearly seasonality (total 6 columns)
        "sin1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        # Holiday with pre and post effect (1 at the where the date and event match)
        # e.g. New Years Day is 1 at 1st January, 0 rest of the days
        "Q('events_New Years Day')[T.event]":
        np.array([1.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_1')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_2')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_1')[T.event]":
        np.array([0.0, 1.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_2')[T.event]":
        np.array([0.0, 0.0, 1.0, 0.0, 0.0]),
    })
    components = silverkite_diagnostics.get_silverkite_components(
        df, time_col, value_col, feature_df)

    # Check plot_silverkite_components with defaults
    fig = silverkite_diagnostics.plot_silverkite_components(components)
    assert len(fig.data) == 5 + 2  # 2 changepoints
    assert [fig.data[i].name for i in range(len(fig.data))
            ] == list(components.columns)[1:-1] + ["trend change point"] * 2

    assert fig.layout.height == (len(
        fig.data) - 2) * 350  # changepoints do not create separate subplots
    assert fig.layout.showlegend is True  # legend for changepoints
    assert fig.layout.title["text"] == "Component plots"

    assert fig.layout.xaxis.title["text"] == time_col
    assert fig.layout.xaxis2.title["text"] == time_col
    assert fig.layout.xaxis3.title["text"] == "Day of week"
    assert fig.layout.xaxis4.title["text"] == "Time of year"
    assert fig.layout.xaxis5.title["text"] == time_col

    assert fig.layout.yaxis.title["text"] == value_col
    assert fig.layout.yaxis2.title["text"] == "trend"
    assert fig.layout.yaxis3.title["text"] == "weekly"
    assert fig.layout.yaxis4.title["text"] == "yearly"
    assert fig.layout.yaxis5.title["text"] == "events"

    # Check plot_silverkite_components with provided component list and warnings
    with pytest.warns(Warning) as record:
        names = ["YEARLY_SEASONALITY", value_col, "DUMMY"]
        title = "Component plot without trend and weekly seasonality"
        fig = silverkite_diagnostics.plot_silverkite_components(components,
                                                                names=names,
                                                                title=title)

        expected_length = 2
        assert len(fig.data) == expected_length
        assert [fig.data[i].name for i in range(len(fig.data))
                ] == [value_col, "YEARLY_SEASONALITY"]

        assert fig.layout.height == expected_length * 350
        assert fig.layout.showlegend is True
        assert fig.layout.title["text"] == title

        assert fig.layout.xaxis.title["text"] == time_col
        assert fig.layout.xaxis2.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == value_col
        assert fig.layout.yaxis2.title["text"] == "yearly"
        assert f"The following components have not been specified in the model: " \
               f"{{'DUMMY'}}, plotting the rest." in record[0].message.args[0]

    # Check plot_silverkite_components with exception
    with pytest.raises(
            ValueError,
            match=
            "None of the provided components have been specified in the model."
    ):
        names = ["DUMMY"]
        silverkite_diagnostics.plot_silverkite_components(components,
                                                          names=names)
def test_get_silverkite_components():
    """Tests get_silverkite_components function"""
    silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics()

    # Dataframe with trend, seasonality and events
    time_col = "ts"
    # value_col name is chosen such that it contains keywords "ct" and "sin"
    # so that we can test patterns specified for each component work correctly
    value_col = "basin_impact"
    df = pd.DataFrame({
        time_col: [
            datetime.datetime(2018, 1, 1),
            datetime.datetime(2018, 1, 2),
            datetime.datetime(2018, 1, 3),
            datetime.datetime(2018, 1, 4),
            datetime.datetime(2018, 1, 5)
        ],
        value_col: [1, 2, 3, 4, 5],
        "dummy_col": [0, 0, 0, 0, 0],
    })
    feature_df = pd.DataFrame({
        # Trend columns: growth, changepoints and interactions (total 5 columns)
        "ct1":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct1:tod":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct_sqrt":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint0_2018_01_02_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint1_2018_01_04_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        # Daily seasonality with interaction (total 4 columns)
        "sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        # Yearly seasonality (total 6 columns)
        "sin1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        # Holiday with pre and post effect (1 at the where the date and event match)
        # e.g. New Years Day is 1 at 1st January, 0 rest of the days
        "Q('events_New Years Day')[T.event]":
        np.array([1.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_1')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_2')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_1')[T.event]":
        np.array([0.0, 1.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_2')[T.event]":
        np.array([0.0, 0.0, 1.0, 0.0, 0.0]),
    })
    components = silverkite_diagnostics.get_silverkite_components(
        df, time_col, value_col, feature_df)
    expected_df = pd.DataFrame({
        time_col:
        df[time_col],
        value_col:
        df[value_col],
        "trend":
        5 * np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "WEEKLY_SEASONALITY":
        4 * np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "YEARLY_SEASONALITY":
        6 * np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        cst.EVENT_PREFIX:
        np.array([1.0, 1.0, 1.0, 0.0, 0.0]),
        "trend_changepoints":
        np.array([0, 1, 0, 1, 0])
    })
    assert_frame_equal(components, expected_df)

    # Test error messages
    with pytest.raises(ValueError, match="feature_df must be non-empty"):
        silverkite_diagnostics.get_silverkite_components(
            df, time_col, value_col, feature_df=pd.DataFrame())

    with pytest.raises(
            ValueError,
            match="df and feature_df must have same number of rows."):
        silverkite_diagnostics.get_silverkite_components(
            df,
            time_col,
            value_col,
            feature_df=pd.DataFrame({"ts": [1, 2, 3]}))
def test_group_silverkite_seas_components():
    """Tests group_silverkite_seas_components"""
    silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics()
    time_col = "ts"
    # Daily
    date_list = pd.date_range(start="2018-01-01", end="2018-01-07",
                              freq="H").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "DAILY_SEASONALITY": time_df["hour"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Hour of day": np.arange(24.0),
        "daily": np.arange(24.0),
    })
    assert_frame_equal(res, expected_df)

    # Weekly
    date_list = pd.date_range(start="2018-01-01", end="2018-01-20",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "WEEKLY_SEASONALITY": time_df["tow"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Day of week": np.arange(7.0),
        "weekly": np.arange(7.0),
    })
    assert_frame_equal(res, expected_df)

    # Monthly
    date_list = pd.date_range(start="2018-01-01", end="2018-01-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "MONTHLY_SEASONALITY": time_df["dom"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of month": np.arange(31.0) / 31,
        "monthly": np.arange(1.0, 32.0),
    })
    assert_frame_equal(res, expected_df)

    # Quarterly (92 day quarters)
    date_list = pd.date_range(start="2018-07-01", end="2018-12-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "QUARTERLY_SEASONALITY": time_df["toq"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of quarter": np.arange(92.0) / 92,
        "quarterly": np.arange(92.0) / 92,
    })
    assert_frame_equal(res, expected_df)

    # Quarterly (90 day quarter)
    date_list = pd.date_range(start="2018-01-01", end="2018-03-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "QUARTERLY_SEASONALITY": time_df["toq"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of quarter": np.arange(90.0) / 90,
        "quarterly": np.arange(90.0) / 90,
    })
    assert_frame_equal(res, expected_df)

    # Yearly (non-leap years)
    date_list = pd.date_range(start="2018-01-01", end="2019-12-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "YEARLY_SEASONALITY": time_df["toy"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of year": np.arange(365.0) / 365,
        "yearly": np.arange(365.0) / 365,
    })
    assert_frame_equal(res, expected_df)
Beispiel #10
0
def apply_default_model_components(model_components=None,
                                   time_properties=None):
    """Sets default values for ``model_components``.

    Parameters
    ----------
    model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or None, default None
        Configuration of model growth, seasonality, events, etc.
        See :func:`~greykite.framework.templates.silverkite_templates.silverkite_template` for details.
    time_properties : `dict` [`str`, `any`] or None, default None
        Time properties dictionary (likely produced by
        `~greykite.common.time_properties_forecast.get_forecast_time_properties`)
        with keys:

        ``"period"`` : `int`
            Period of each observation (i.e. minimum time between observations, in seconds).
        ``"simple_freq"`` : `SimpleTimeFrequencyEnum`
            ``SimpleTimeFrequencyEnum`` member corresponding to data frequency.
        ``"num_training_points"`` : `int`
            Number of observations for training.
        ``"num_training_days"`` : `int`
            Number of days for training.
        ``"start_year"`` : `int`
            Start year of the training period.
        ``"end_year"`` : `int`
            End year of the forecast period.
        ``"origin_for_time_vars"`` : `float`
            Continuous time representation of the first date in ``df``.

    Returns
    -------
    model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam`
        The provided ``model_components`` with default values set
    """
    if model_components is None:
        model_components = ModelComponentsParam()
    else:
        # makes a copy to avoid mutating input
        model_components = dataclasses.replace(model_components)

    # sets default values
    default_seasonality = {
        "fs_components_df": [
            pd.DataFrame({
                "name": ["tod", "tow", "tom", "toq", "toy"],
                "period": [24.0, 7.0, 1.0, 1.0, 1.0],
                "order": [3, 3, 1, 1, 5],
                "seas_names":
                ["daily", "weekly", "monthly", "quarterly", "yearly"]
            })
        ],
    }
    model_components.seasonality = update_dictionary(
        default_seasonality,
        overwrite_dict=model_components.seasonality,
        allow_unknown_keys=False)

    # model_components.growth must be empty.
    # Pass growth terms via `extra_pred_cols` instead.
    default_growth = {}
    model_components.growth = update_dictionary(
        default_growth,
        overwrite_dict=model_components.growth,
        allow_unknown_keys=False)

    default_events = {
        "daily_event_df_dict": [None],
    }
    model_components.events = update_dictionary(
        default_events,
        overwrite_dict=model_components.events,
        allow_unknown_keys=False)

    default_changepoints = {
        "changepoints_dict": [None],
        "seasonality_changepoints_dict": [None],
        # Not allowed, to prevent leaking future information
        # into the past. Pass `changepoints_dict` with method="auto" for
        # automatic detection.
        # "changepoint_detector": [None],
    }
    model_components.changepoints = update_dictionary(
        default_changepoints,
        overwrite_dict=model_components.changepoints,
        allow_unknown_keys=False)

    default_autoregression = {
        "autoreg_dict": [None],
    }
    model_components.autoregression = update_dictionary(
        default_autoregression,
        overwrite_dict=model_components.autoregression,
        allow_unknown_keys=False)

    default_regressors = {}
    model_components.regressors = update_dictionary(
        default_regressors,
        overwrite_dict=model_components.regressors,
        allow_unknown_keys=False)

    default_lagged_regressors = {
        "lagged_regressor_dict": [None],
    }
    model_components.lagged_regressors = update_dictionary(
        default_lagged_regressors,
        overwrite_dict=model_components.lagged_regressors,
        allow_unknown_keys=False)

    default_uncertainty = {
        "uncertainty_dict": [None],
    }
    model_components.uncertainty = update_dictionary(
        default_uncertainty,
        overwrite_dict=model_components.uncertainty,
        allow_unknown_keys=False)

    if time_properties is not None:
        origin_for_time_vars = time_properties.get("origin_for_time_vars")
    else:
        origin_for_time_vars = None

    default_custom = {
        "silverkite":
        [SilverkiteForecast()],  # NB: sklearn creates a copy in grid search
        "silverkite_diagnostics": [SilverkiteDiagnostics()],
        # The same origin for every split, based on start year of full dataset.
        # To use first date of each training split, set to `None` in model_components.
        "origin_for_time_vars": [origin_for_time_vars],
        "extra_pred_cols": ["ct1"],  # linear growth
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
        "max_admissible_value": [None],
    }
    model_components.custom = update_dictionary(
        default_custom,
        overwrite_dict=model_components.custom,
        allow_unknown_keys=False)

    # sets to {} if None, for each item if
    # `model_components.hyperparameter_override` is a list of dictionaries
    model_components.hyperparameter_override = update_dictionaries(
        {}, overwrite_dicts=model_components.hyperparameter_override)

    return model_components
Beispiel #11
0
    def __init__(
            self,
            silverkite: SilverkiteForecast = SilverkiteForecast(),
            silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics(),
            score_func=mean_squared_error,
            coverage=None,
            null_model_params=None,
            origin_for_time_vars=None,
            extra_pred_cols=None,
            train_test_thresh=None,
            training_fraction=None,
            fit_algorithm_dict=None,
            daily_event_df_dict=None,
            fs_components_df=pd.DataFrame({
                "name": ["tod", "tow", "conti_year"],
                "period": [24.0, 7.0, 1.0],
                "order": [3, 3, 5],
                "seas_names": ["daily", "weekly", "yearly"]}),
            autoreg_dict=None,
            lagged_regressor_dict=None,
            changepoints_dict=None,
            seasonality_changepoints_dict=None,
            changepoint_detector=None,
            min_admissible_value=None,
            max_admissible_value=None,
            uncertainty_dict=None,
            normalize_method=None,
            adjust_anomalous_dict=None,
            impute_dict=None,
            regression_weight_col=None,
            forecast_horizon=None,
            simulation_based=False):
        # every subclass of BaseSilverkiteEstimator must call super().__init__
        super().__init__(
            silverkite=silverkite,
            silverkite_diagnostics=silverkite_diagnostics,
            score_func=score_func,
            coverage=coverage,
            null_model_params=null_model_params,
            uncertainty_dict=uncertainty_dict)

        # necessary to set parameters, to ensure get_params() works
        # (used in grid search)
        self.score_func = score_func
        self.coverage = coverage
        self.null_model_params = null_model_params
        self.origin_for_time_vars = origin_for_time_vars
        self.extra_pred_cols = extra_pred_cols
        self.train_test_thresh = train_test_thresh
        self.fit_algorithm_dict = fit_algorithm_dict
        self.training_fraction = training_fraction
        self.daily_event_df_dict = daily_event_df_dict
        self.fs_components_df = fs_components_df
        self.autoreg_dict = autoreg_dict
        self.lagged_regressor_dict = lagged_regressor_dict
        self.changepoints_dict = changepoints_dict
        self.seasonality_changepoints_dict = seasonality_changepoints_dict
        self.changepoint_detector = changepoint_detector
        self.min_admissible_value = min_admissible_value
        self.max_admissible_value = max_admissible_value
        self.uncertainty_dict = uncertainty_dict
        self.normalize_method = normalize_method
        self.adjust_anomalous_dict = adjust_anomalous_dict
        self.impute_dict = impute_dict
        self.regression_weight_col = regression_weight_col
        self.forecast_horizon = forecast_horizon
        self.simulation_based = simulation_based
        self.validate_inputs()