Example #1
0
    def __init__(self,
                 silverkite: SilverkiteForecast = SilverkiteForecast(),
                 silverkite_diagnostics:
                 SilverkiteDiagnostics = SilverkiteDiagnostics(),
                 score_func: callable = mean_squared_error,
                 coverage: float = None,
                 null_model_params: Optional[Dict] = None,
                 uncertainty_dict: Optional[Dict] = None):
        # initializes null model
        super().__init__(score_func=score_func,
                         coverage=coverage,
                         null_model_params=null_model_params)

        # required in subclasses __init__
        self.uncertainty_dict = uncertainty_dict

        # set by `fit`
        # fitted model in dictionary format returned from
        # the `forecast_silverkite` function
        self.silverkite: SilverkiteForecast = silverkite
        self.silverkite_diagnostics: SilverkiteDiagnostics = silverkite_diagnostics
        self.model_dict = None
        self.pred_cols = None
        self.feature_cols = None
        self.df = None
        self.coef_ = None
        # Predictor category, lazy initialization as None.
        # Will be updated in property function pred_category when needed.
        self._pred_category = None
        self.extra_pred_cols = None  # all silverkite estimators should support this.

        # set by the predict method
        self.forecast = None
        # set by the summary method
        self.model_summary = None
Example #2
0
def silverkite_diagnostics():
    return SilverkiteDiagnostics()
Example #3
0
def test_get_silverkite_hyperparameter_grid(model_components_param, silverkite, silverkite_diagnostics):
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__silverkite": [SilverkiteForecast()],
        "estimator__silverkite_diagnostics": [SilverkiteDiagnostics()],
        "estimator__origin_for_time_vars": [None],
        "estimator__extra_pred_cols": [["ct1"]],
        "estimator__train_test_thresh": [None],
        "estimator__training_fraction": [None],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None}],
        "estimator__daily_event_df_dict": [None],
        "estimator__fs_components_df": [pd.DataFrame({
            "name": ["tod", "tow", "tom", "toq", "toy"],
            "period": [24.0, 7.0, 1.0, 1.0, 1.0],
            "order": [3, 3, 1, 1, 5],
            "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})],
        "estimator__autoreg_dict": [None],
        "estimator__changepoints_dict": [None],
        "estimator__seasonality_changepoints_dict": [None],
        "estimator__changepoint_detector": [None],
        "estimator__min_admissible_value": [None],
        "estimator__max_admissible_value": [None],
        "estimator__uncertainty_dict": [None],
    }
    assert_equal(hyperparameter_grid, expected_grid, ignore_keys={"estimator__silverkite": None, "estimator__silverkite_diagnostics": None})
    assert hyperparameter_grid["estimator__silverkite"][0] != silverkite
    assert hyperparameter_grid["estimator__silverkite_diagnostics"][0] != silverkite_diagnostics

    # Tests auto-list conversion
    template.config.model_components_param = model_components_param
    template.time_properties = {"origin_for_time_vars": 2020}
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__silverkite": [silverkite],
        "estimator__silverkite_diagnostics": [silverkite_diagnostics],
        "estimator__origin_for_time_vars": [2020],
        "estimator__extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
        "estimator__train_test_thresh": [None],
        "estimator__training_fraction": [None],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "estimator__daily_event_df_dict": [None],
        "estimator__fs_components_df": [None],
        "estimator__autoreg_dict": [None],
        "estimator__changepoints_dict": [{
            "method": "uniform",
            "n_changepoints": 20,
        }],
        "estimator__seasonality_changepoints_dict": [None],
        "estimator__changepoint_detector": [None],
        "estimator__min_admissible_value": [None],
        "estimator__max_admissible_value": [4],
        "estimator__uncertainty_dict": [{
            "uncertainty_method": "simple_conditional_residuals"
        }],
    }
    assert_equal(hyperparameter_grid, expected_grid)

    # Tests hyperparameter_override
    template.config.model_components_param.hyperparameter_override = [
        {
            "input__response__null__max_frac": 0.1,
            "estimator__min_admissible_value": [2],
            "estimator__extra_pred_cols": ["override_estimator__extra_pred_cols"],
        },
        {},
        {
            "estimator__extra_pred_cols": ["val1", "val2"],
            "estimator__origin_for_time_vars": [2019],
        },
        None
    ]
    template.time_properties = {"origin_for_time_vars": 2020}
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid["estimator__origin_for_time_vars"] = [2020]
    updated_grid1 = expected_grid.copy()
    updated_grid1["input__response__null__max_frac"] = [0.1]
    updated_grid1["estimator__min_admissible_value"] = [2]
    updated_grid1["estimator__extra_pred_cols"] = [["override_estimator__extra_pred_cols"]]
    updated_grid2 = expected_grid.copy()
    updated_grid2["estimator__extra_pred_cols"] = [["val1", "val2"]]
    updated_grid2["estimator__origin_for_time_vars"] = [2019]
    expected_grid = [
        updated_grid1,
        expected_grid,
        updated_grid2,
        expected_grid]
    assert_equal(hyperparameter_grid, expected_grid)
Example #4
0
def test_apply_default_model_components(model_components_param, silverkite, silverkite_diagnostics):
    model_components = apply_default_model_components()
    assert_equal(model_components.seasonality, {
        "fs_components_df": [pd.DataFrame({
            "name": ["tod", "tow", "tom", "toq", "toy"],
            "period": [24.0, 7.0, 1.0, 1.0, 1.0],
            "order": [3, 3, 1, 1, 5],
            "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})],
    })
    assert model_components.growth == {}
    assert model_components.events == {
        "daily_event_df_dict": [None],
    }
    assert model_components.changepoints == {
        "changepoints_dict": [None],
        "seasonality_changepoints_dict": [None],
    }
    assert model_components.autoregression == {
        "autoreg_dict": [None],
    }
    assert model_components.regressors == {}
    assert model_components.uncertainty == {
        "uncertainty_dict": [None],
    }
    assert_equal(model_components.custom, {
        "silverkite": [SilverkiteForecast()],
        "silverkite_diagnostics": [SilverkiteDiagnostics()],
        "origin_for_time_vars": [None],
        "extra_pred_cols": ["ct1"],  # linear growth
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
        "max_admissible_value": [None],
    }, ignore_keys={
        "silverkite": None,
        "silverkite_diagnostics": None
    })
    assert model_components.custom["silverkite"][0] != silverkite  # a different instance was created
    assert model_components.custom["silverkite_diagnostics"][0] != silverkite_diagnostics

    # overwrite some parameters
    time_properties = {
        "origin_for_time_vars": 2020
    }
    original_components = dataclasses.replace(model_components_param)  # creates a copy
    updated_components = apply_default_model_components(
        model_components=model_components_param,
        time_properties=time_properties)
    assert original_components == model_components_param  # not mutated by the function
    assert updated_components.seasonality == model_components_param.seasonality
    assert updated_components.events == {
        "daily_event_df_dict": [None],
    }
    assert updated_components.changepoints == {
        "changepoints_dict": {  # combination of defaults and provided params
            "method": "uniform",
            "n_changepoints": 20,
        },
        "seasonality_changepoints_dict": [None],
    }
    assert updated_components.autoregression == {"autoreg_dict": [None]}
    assert updated_components.uncertainty == model_components_param.uncertainty
    assert updated_components.custom == {  # combination of defaults and provided params
        "silverkite": silverkite,  # the same object that was passed in (not a copy)
        "silverkite_diagnostics": silverkite_diagnostics,
        "origin_for_time_vars": [time_properties["origin_for_time_vars"]],  # from time_properties
        "extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
        "max_admissible_value": 4,
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
    }

    # `time_properties` without start_year key
    updated_components = apply_default_model_components(
        model_components=model_components_param,
        time_properties={})
    assert updated_components.custom["origin_for_time_vars"] == [None]

    updated_components = apply_default_model_components(
        model_components=ModelComponentsParam(
            autoregression={
                "autoreg_dict": {
                    "lag_dict": {"orders": [7]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7*2, 7*3]],
                        "interval_list": [(7, 7*2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            })
    )

    autoreg_dict = updated_components.autoregression["autoreg_dict"]
    assert autoreg_dict["lag_dict"] == {"orders": [7]}
    assert autoreg_dict["agg_lag_dict"]["orders_list"] == [[7, 14, 21]]
    assert autoreg_dict["agg_lag_dict"]["interval_list"] == [(7, 14)]
    def __init__(
            self,
            silverkite: SimpleSilverkiteForecast = SimpleSilverkiteForecast(),
            silverkite_diagnostics:
        SilverkiteDiagnostics = SilverkiteDiagnostics(),
            score_func: callable = mean_squared_error,
            coverage: float = None,
            null_model_params: Optional[Dict] = None,
            time_properties: Optional[Dict] = None,
            freq: Optional[str] = None,
            forecast_horizon: Optional[int] = None,
            origin_for_time_vars: Optional[float] = None,
            train_test_thresh: Optional[datetime] = None,
            training_fraction: Optional[float] = None,
            fit_algorithm_dict: Optional[Dict] = None,
            holidays_to_model_separately: Optional[Union[str,
                                                         List[str]]] = "auto",
            holiday_lookup_countries: Optional[Union[str, List[str]]] = "auto",
            holiday_pre_num_days: int = 2,
            holiday_post_num_days: int = 2,
            holiday_pre_post_num_dict: Optional[Dict] = None,
            daily_event_df_dict: Optional[Dict] = None,
            changepoints_dict: Optional[Dict] = None,
            yearly_seasonality: Union[bool, str, int] = "auto",
            quarterly_seasonality: Union[bool, str, int] = "auto",
            monthly_seasonality: Union[bool, str, int] = "auto",
            weekly_seasonality: Union[bool, str, int] = "auto",
            daily_seasonality: Union[bool, str, int] = "auto",
            max_daily_seas_interaction_order: Optional[int] = None,
            max_weekly_seas_interaction_order: Optional[int] = None,
            autoreg_dict: Optional[Dict] = None,
            seasonality_changepoints_dict: Optional[Dict] = None,
            min_admissible_value: Optional[float] = None,
            max_admissible_value: Optional[float] = None,
            uncertainty_dict: Optional[Dict] = None,
            growth_term: Optional[str] = "linear",
            regressor_cols: Optional[List[str]] = None,
            feature_sets_enabled: Optional[Union[bool, Dict[str,
                                                            bool]]] = None,
            extra_pred_cols: Optional[List[str]] = None,
            regression_weight_col: Optional[str] = None,
            simulation_based: Optional[bool] = False):
        # every subclass of BaseSilverkiteEstimator must call super().__init__
        super().__init__(silverkite=silverkite,
                         silverkite_diagnostics=silverkite_diagnostics,
                         score_func=score_func,
                         coverage=coverage,
                         null_model_params=null_model_params,
                         uncertainty_dict=uncertainty_dict)

        # necessary to set parameters, to ensure get_params() works
        # (used in grid search)
        self.score_func = score_func
        self.coverage = coverage
        self.null_model_params = null_model_params
        self.time_properties = time_properties
        self.freq = freq
        self.forecast_horizon = forecast_horizon
        self.origin_for_time_vars = origin_for_time_vars
        self.train_test_thresh = train_test_thresh
        self.training_fraction = training_fraction
        self.fit_algorithm_dict = fit_algorithm_dict
        self.holidays_to_model_separately = holidays_to_model_separately
        self.holiday_lookup_countries = holiday_lookup_countries
        self.holiday_pre_num_days = holiday_pre_num_days
        self.holiday_post_num_days = holiday_post_num_days
        self.holiday_pre_post_num_dict = holiday_pre_post_num_dict
        self.daily_event_df_dict = daily_event_df_dict
        self.changepoints_dict = changepoints_dict
        self.yearly_seasonality = yearly_seasonality
        self.quarterly_seasonality = quarterly_seasonality
        self.monthly_seasonality = monthly_seasonality
        self.weekly_seasonality = weekly_seasonality
        self.daily_seasonality = daily_seasonality
        self.max_daily_seas_interaction_order = max_daily_seas_interaction_order
        self.max_weekly_seas_interaction_order = max_weekly_seas_interaction_order
        self.autoreg_dict = autoreg_dict
        self.seasonality_changepoints_dict = seasonality_changepoints_dict
        self.min_admissible_value = min_admissible_value
        self.max_admissible_value = max_admissible_value
        self.uncertainty_dict = uncertainty_dict
        self.growth_term = growth_term
        self.regressor_cols = regressor_cols
        self.feature_sets_enabled = feature_sets_enabled
        self.extra_pred_cols = extra_pred_cols
        self.regression_weight_col = regression_weight_col
        self.simulation_based = simulation_based
Example #6
0
class BaseSilverkiteEstimator(BaseForecastEstimator):
    """A base class for forecast estimators that fit using
    `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast.forecast`.

    Notes
    -----
    Allows estimators that fit using
    `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast.forecast`
    to share the same functions for input data validation,
    fit postprocessing, predict, summary, plot_components, etc.

    Subclasses should:

        - Implement their own ``__init__`` that uses a superset of the parameters here.
        - Implement their own ``fit``, with this sequence of steps:

            - calls ``super().fit``
            - calls ``SilverkiteForecast.forecast`` or ``SimpleSilverkiteForecast.forecast_simple`` and stores the result in ``self.model_dict``
            - calls ``super().finish_fit``

    Uses ``coverage`` to set prediction band width. Even though
    coverage is not needed by ``forecast_silverkite``, it is included
    in every ``BaseForecastEstimator`` to be used universally for
    forecast evaluation.

    Therefore, ``uncertainty_dict`` must be consistent with ``coverage``
    if provided as a dictionary. If ``uncertainty_dict`` is None or
    "auto", an appropriate default value is set, according to ``coverage``.

    Parameters
    ----------
    score_func : callable, optional, default mean_squared_error
        See `~greykite.sklearn.estimator.base_forecast_estimator.BaseForecastEstimator`.
    coverage : `float` between [0.0, 1.0] or None, optional
        See `~greykite.sklearn.estimator.base_forecast_estimator.BaseForecastEstimator`.
    null_model_params : `dict`, optional
        Dictionary with arguments to define DummyRegressor null model, default is `None`.
        See `~greykite.sklearn.estimator.base_forecast_estimator.BaseForecastEstimator`.
    uncertainty_dict : `dict` or `str` or None, optional
        How to fit the uncertainty model.
        See `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast.forecast`.
        Note that this is allowed to be "auto". If None or "auto", will be set to
        a default value by ``coverage`` before calling ``forecast_silverkite``.

    Attributes
    ----------
    silverkite : Class or a derived class of `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast`
        The silverkite algorithm instance used for forecasting
    silverkite_diagnostics : Class or a derived class of `~greykite.algo.forecast.silverkite.silverkite_diagnostics.SilverkiteDiagnostics`
        The silverkite class used for plotting and generating model summary.
    model_dict : `dict` or None
        A dict with fitted model and its attributes.
        The output of `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast.forecast`.
    pred_cols : `list` [`str`] or None
        Names of the features used in the model.
    feature_cols : `list` [`str`] or None
        Column names of the patsy design matrix built by
        `~greykite.algo.common.ml_models.design_mat_from_formula`.
    df : `pandas.DataFrame` or None
        The training data used to fit the model.
    coef_ : `pandas.DataFrame` or None
        Estimated coefficient matrix for the model.
        Not available for ``random forest`` and ``gradient boosting`` methods and
        set to the default value `None`.
    _pred_category : `dict` or None
        A dictionary with keys being the predictor category and
        values being the predictors belonging to the category.
        For details, see
        `~greykite.sklearn.estimator.base_silverkite_estimator.BaseSilverkiteEstimator.pred_category`.
    extra_pred_cols : `list` or None
        User provided extra predictor names, for details, see
        `~greykite.sklearn.estimator.simple_silverkite_estimator.SimpleSilverkiteEstimator`
        or
        `~greykite.sklearn.estimator.silverkite_estimator.SilverkiteEstimator`.
    forecast : `pandas.DataFrame` or None
        Output of ``predict_silverkite``, set by ``self.predict``.
    model_summary : `class` or `None`
        The `~greykite.algo.common.model_summary.ModelSummary` class.

    See Also
    --------
    `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast.forecast`
        Function performing the fit and predict.

    Notes
    -----
    The subclasses will pass ``fs_components_df`` to ``forecast_silverkite``. The model terms
    it creates internally are used to generate the component plots.

        - `~greykite.common.features.timeseries_features.fourier_series_multi_fcn` uses
          ``fs_components_df["names"]`` (e.g. ``tod``, ``tow``) to build the fourier series
          and to create column names.

        - ``fs_components_df["seas_names"]`` (e.g. ``daily``, ``weekly``) is appended
          to the column names, if provided.

    `~greykite.algo.forecast.silverkite.silverkite_diagnostics.SilverkiteDiagnostics.plot_silverkite_components` groups
    based on ``fs_components_df["seas_names"]`` passed to ``forecast_silverkite`` during fit.
    E.g. any column containing ``daily`` is added to daily seasonality effect. The reason
    is as follows:

        1. User can provide ``tow`` and ``str_dow`` for weekly seasonality.
        These should be aggregated, and we can do that only based on "seas_names".
        2. yearly and quarterly seasonality both use ``ct1`` as "names" column.
        Only way to distinguish those effects is via "seas_names".
        3. ``ct1`` is also used for growth. If it is interacted with seasonality, the columns become
        indistinguishable without "seas_names".

    Additionally, the function sets yaxis labels based on ``seas_names``:
    ``daily`` as ylabel is much more informative than ``tod`` as ylabel in component plots.
    """
    def __init__(self,
                 silverkite: SilverkiteForecast = SilverkiteForecast(),
                 silverkite_diagnostics:
                 SilverkiteDiagnostics = SilverkiteDiagnostics(),
                 score_func: callable = mean_squared_error,
                 coverage: float = None,
                 null_model_params: Optional[Dict] = None,
                 uncertainty_dict: Optional[Dict] = None):
        # initializes null model
        super().__init__(score_func=score_func,
                         coverage=coverage,
                         null_model_params=null_model_params)

        # required in subclasses __init__
        self.uncertainty_dict = uncertainty_dict

        # set by `fit`
        # fitted model in dictionary format returned from
        # the `forecast_silverkite` function
        self.silverkite: SilverkiteForecast = silverkite
        self.silverkite_diagnostics: SilverkiteDiagnostics = silverkite_diagnostics
        self.model_dict = None
        self.pred_cols = None
        self.feature_cols = None
        self.df = None
        self.coef_ = None
        # Predictor category, lazy initialization as None.
        # Will be updated in property function pred_category when needed.
        self._pred_category = None
        self.extra_pred_cols = None  # all silverkite estimators should support this.

        # set by the predict method
        self.forecast = None
        # set by the summary method
        self.model_summary = None

    def __set_uncertainty_dict(self, X, time_col, value_col):
        """Checks if ``coverage`` is consistent with the ``uncertainty_dict``
        used to train the ``forecast_silverkite`` model. Sets ``uncertainty_dict``
        to a default value if ``coverage`` is provided, and vice versa.

        Parameters
        ----------
        X: `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            ``X`` to allow transformation by `sklearn.pipeline.Pipeline`.
        time_col: `str`
            Time column name in ``X``.
        value_col: `str`
            Value column name in ``X``.

        Notes
        -----
        Intended to be called by `fit`.

        ``X`` is necessary to define default parameters for
        ``uncertainty_dict`` if ``coverage`` is provided but ``uncertainty_dict is None``
         or ``uncertainty_dict=="auto"``.
        (NB: ``X`` would not be necessary and this function could called from __init__
        if ``forecast_silverkite`` provides a default value for ``uncertainty_dict``
        given the target coverage).
        """
        period = min_gap_in_seconds(df=X, time_col=time_col)
        simple_freq = get_simple_time_frequency_from_period(period).name

        # Updates `uncertainty_dict` if None or "auto" or missing quantiles,
        # to match ``coverage``.
        # Raises an exception if both are provided and they don't match.
        self.uncertainty_dict = get_silverkite_uncertainty_dict(
            uncertainty=self.uncertainty_dict,
            simple_freq=simple_freq,
            coverage=self.coverage)

        # Updates coverage if None, to match the widest interval of
        # ``uncertainty_dict``. If coverage is not None, they are
        # already consistent, but we set it anyway.
        if self.uncertainty_dict is not None:
            quantiles = self.uncertainty_dict["params"]["quantiles"]
            self.coverage = quantiles[-1] - quantiles[0]

    def fit(self,
            X,
            y=None,
            time_col=cst.TIME_COL,
            value_col=cst.VALUE_COL,
            **fit_params):
        """Pre-processing before fitting ``Silverkite`` forecast model.

        Parameters
        ----------
        X: `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            ``X`` to allow transformation by `sklearn.pipeline`.
        y: ignored
            The original timeseries values, ignored.
            (The ``y`` for fitting is included in ``X``).
        time_col: `str`
            Time column name in ``X``.
        value_col: `str`
            Value column name in ``X``.
        fit_params: `dict`
            additional parameters for null model.

        Notes
        -----
        Subclasses are expected to call this at the beginning of their ``fit`` method,
        before calling `~greykite.algo.forecast.silverkite.forecast_silverkite.SilverkiteForecast.forecast`.
        """
        # NB: calls `__set_uncertainty_dict` before `super().fit` to ensure
        # coverage is correct before fitting the null model.
        # (null model does not currently use `coverage`, but may in the future.)
        self.__set_uncertainty_dict(X=X,
                                    time_col=time_col,
                                    value_col=value_col)
        self.df = X

        super().fit(X=X,
                    y=y,
                    time_col=time_col,
                    value_col=value_col,
                    **fit_params)

    def finish_fit(self):
        """Makes important values of ``self.model_dict`` conveniently accessible.

        To be called by subclasses at the end of their ``fit`` method. Sets
        {``pred_cols``, ``feature_cols``, and ``coef_``}.
        """
        if self.model_dict is None:
            raise ValueError(
                "Must set `self.model_dict` before calling this function.")

        self.pred_cols = self.model_dict["pred_cols"]
        self.feature_cols = self.model_dict["x_mat"].columns
        # model coefficients
        if hasattr(self.model_dict["ml_model"], "coef_"):
            self.coef_ = pd.DataFrame(self.model_dict["ml_model"].coef_,
                                      index=self.feature_cols)

        self._set_silverkite_diagnostics_params()
        return self

    def _set_silverkite_diagnostics_params(self):
        if self.silverkite_diagnostics is None:
            self.silverkite_diagnostics = SilverkiteDiagnostics()
        self.silverkite_diagnostics.set_params(self.pred_category,
                                               self.time_col_, self.value_col_)

    def predict(self, X, y=None):
        """Creates forecast for the dates specified in ``X``.

        Parameters
        ----------
        X: `pandas.DataFrame`
            Input timeseries with timestamp column and any additional regressors.
            Timestamps are the dates for prediction.
            Value column, if provided in ``X``, is ignored.
        y: ignored.

        Returns
        -------
        predictions: `pandas.DataFrame`
            Forecasted values for the dates in ``X``. Columns:

                - ``TIME_COL``: dates
                - ``PREDICTED_COL``: predictions
                - ``PREDICTED_LOWER_COL``: lower bound of predictions, optional
                - ``PREDICTED_UPPER_COL``: upper bound of predictions, optional
                - [other columns], optional

            ``PREDICTED_LOWER_COL`` and ``PREDICTED_UPPER_COL`` are present
            if ``self.coverage`` is not None.
        """
        # Returns the cached result if applicable
        cached_predictions = super().predict(X=X)
        if cached_predictions is not None:
            return cached_predictions

        if self.model_dict is None:
            raise NotFittedError("Call `fit` before calling `predict`.")
        if self.pred_cols is None:
            raise NotFittedError(
                "Subclass must call `finish_fit` inside the `fit` method.")

        pred_df = self.silverkite.predict(
            fut_df=X,
            trained_model=self.model_dict,
            past_df=None,
            new_external_regressor_df=None)[
                "fut_df"]  # regressors are included in X

        self.forecast = pred_df
        # renames columns to standardized schema
        output_columns = {
            self.time_col_: cst.TIME_COL,
            cst.VALUE_COL: cst.PREDICTED_COL
        }

        # Checks if uncertainty is also returned.
        # If so, extract the upper and lower limits of the tuples in
        # ``uncertainty_col`` to be lower and upper limits of the prediction interval.
        # Note that the tuple might have more than two elements if more than two
        # ``quantiles`` are passed in ``uncertainty_dict``.
        uncertainty_col = f"{cst.VALUE_COL}_quantile_summary"
        if uncertainty_col in list(pred_df.columns):
            pred_df[cst.PREDICTED_LOWER_COL] = pred_df[uncertainty_col].apply(
                lambda x: x[0])
            pred_df[cst.PREDICTED_UPPER_COL] = pred_df[uncertainty_col].apply(
                lambda x: x[-1])
            output_columns.update({
                cst.PREDICTED_LOWER_COL: cst.PREDICTED_LOWER_COL,
                cst.PREDICTED_UPPER_COL: cst.PREDICTED_UPPER_COL,
                uncertainty_col: uncertainty_col
            })
            if cst.ERR_STD_COL in pred_df.columns:
                output_columns.update({cst.ERR_STD_COL: cst.ERR_STD_COL})

        predictions = (pred_df[output_columns.keys()].rename(output_columns,
                                                             axis=1))
        # Caches the predictions
        self.cached_predictions_ = predictions
        return predictions

    @property
    def pred_category(self):
        """A dictionary that stores the predictor names in each category.

        This property is not initialized until used. This speeds up the
        fitting process. The categories includes

            - "intercept" : the intercept.
            - "time_features" : the predictors that include
              `~greykite.common.constants.TIME_FEATURES`
              but not
              `~greykite.common.constants.SEASONALITY_REGEX`.
            - "event_features" : the predictors that include
              `~greykite.common.constants.EVENT_PREFIX`.
            - "trend_features" : the predictors that include
              `~greykite.common.constants.TREND_REGEX`
              but not
              `~greykite.common.constants.SEASONALITY_REGEX`.
            - "seasonality_features" : the predictors that include
              `~greykite.common.constants.SEASONALITY_REGEX`.
            - "lag_features" : the predictors that include
              `~greykite.common.constants.LAG_REGEX`.
            - "regressor_features" : external regressors and other predictors
              manually passed to ``extra_pred_cols``, but not in the categories above.
            - "interaction_features" : the predictors that include
              interaction terms, i.e., including a colon.

        Note that each predictor falls into at least one category.
        Some "time_features" may also be "trend_features".
        Predictors with an interaction are classified into all categories matched by
        the interaction components. Thus, "interaction_features" are already included
        in the other categories.
        """
        if self.model_dict is None:
            raise NotFittedError("Must fit before getting predictor category.")
        if self._pred_category is None:
            # extra_pred_cols could be None/list
            extra_pred_cols = [] if self.extra_pred_cols is None else self.extra_pred_cols
            # regressor_cols could be non-exist/None/list
            # the if catches non-exist and None
            regressor_cols = [] if getattr(self, "regressor_cols",
                                           None) is None else getattr(
                                               self, "regressor_cols")
            self._pred_category = create_pred_category(
                pred_cols=self.model_dict["x_mat"].columns,
                # extra regressors are specified via "regressor_cols" in simple_silverkite_estimator
                extra_pred_cols=extra_pred_cols + regressor_cols)
        return self._pred_category

    def summary(self, max_colwidth=20):
        if self.silverkite_diagnostics is None:
            self._set_silverkite_diagnostics_params()
        return self.silverkite_diagnostics.summary(self.model_dict,
                                                   max_colwidth)

    def plot_components(self, names=None, title=None):
        if self.model_dict is None:
            raise NotFittedError(
                "Call `fit` before calling `plot_components`.")
        if self.silverkite_diagnostics is None:
            self._set_silverkite_diagnostics_params()
        return self.silverkite_diagnostics.plot_components(
            self.model_dict, names, title)

    def plot_trend(self, title=None):
        """Convenience function to plot the data and the trend component.

        Parameters
        ----------
        title: `str`, optional, default `None`
            Plot title.

        Returns
        -------
        fig: `plotly.graph_objs.Figure`
            Figure.
        """
        if title is None:
            title = "Trend plot"
        return self.plot_components(names=["trend"], title=title)

    def plot_seasonalities(self, title=None):
        """Convenience function to plot the data and the seasonality components.

        Parameters
        ----------
        title: `str`, optional, default `None`
            Plot title.

        Returns
        -------
        fig: `plotly.graph_objs.Figure`
            Figure.
        """
        if title is None:
            title = "Seasonality plot"
        seas_names = [
            "DAILY_SEASONALITY", "WEEKLY_SEASONALITY", "MONTHLY_SEASONALITY",
            "QUARTERLY_SEASONALITY", "YEARLY_SEASONALITY"
        ]
        return self.plot_components(names=seas_names, title=title)

    def plot_trend_changepoint_detection(self, params=None):
        """Convenience function to plot the original trend changepoint detection results.

        Parameters
        ----------
        params : `dict` or `None`, default `None`
            The parameters in `~greykite.algo.changepoint.adalasso.changepoint_detector.ChangepointDetector.plot`.
            If set to `None`, all components will be plotted.

            Note: seasonality components plotting is not supported currently. ``plot`` parameter must be False.

        Returns
        -------
        fig : `plotly.graph_objs.Figure`
            Figure.
        """
        if params is None:
            params = dict(
                observation=True,
                observation_original=True,
                trend_estimate=True,
                trend_change=True,
                yearly_seasonality_estimate=True,
                adaptive_lasso_estimate=True,
                seasonality_change=False,  # currently for trend only
                seasonality_change_by_component=False,
                seasonality_estimate=False,
                plot=False)
        else:
            # currently for trend only
            params["seasonality_change"] = False
            params["seasonality_estimate"] = False
            # need to return the figure object
            params["plot"] = False
        return self.model_dict["changepoint_detector"].plot(**params)
Example #7
0
 def _set_silverkite_diagnostics_params(self):
     if self.silverkite_diagnostics is None:
         self.silverkite_diagnostics = SilverkiteDiagnostics()
     self.silverkite_diagnostics.set_params(self.pred_category,
                                            self.time_col_, self.value_col_)
def test_plot_silverkite_components():
    """Tests plot_silverkite_components function"""
    silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics()
    # Dataframe with trend, seasonality and events
    time_col = "ts"
    # value_col name is chosen such that it contains keywords "ct" and "sin"
    # so that we can test patterns specified for each component work correctly
    value_col = "basin_impact"
    df = pd.DataFrame({
        time_col: [
            datetime.datetime(2018, 1, 1),
            datetime.datetime(2018, 1, 2),
            datetime.datetime(2018, 1, 3),
            datetime.datetime(2018, 1, 4),
            datetime.datetime(2018, 1, 5)
        ],
        value_col: [1, 2, 3, 4, 5],
    })
    feature_df = pd.DataFrame({
        # Trend columns: growth, changepoints and interactions (total 5 columns)
        "ct1":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct1:tod":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct_sqrt":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint0_2018_01_02_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint1_2018_01_04_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        # Daily seasonality with interaction (total 4 columns)
        "sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        # Yearly seasonality (total 6 columns)
        "sin1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        # Holiday with pre and post effect (1 at the where the date and event match)
        # e.g. New Years Day is 1 at 1st January, 0 rest of the days
        "Q('events_New Years Day')[T.event]":
        np.array([1.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_1')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_2')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_1')[T.event]":
        np.array([0.0, 1.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_2')[T.event]":
        np.array([0.0, 0.0, 1.0, 0.0, 0.0]),
    })
    components = silverkite_diagnostics.get_silverkite_components(
        df, time_col, value_col, feature_df)

    # Check plot_silverkite_components with defaults
    fig = silverkite_diagnostics.plot_silverkite_components(components)
    assert len(fig.data) == 5 + 2  # 2 changepoints
    assert [fig.data[i].name for i in range(len(fig.data))
            ] == list(components.columns)[1:-1] + ["trend change point"] * 2

    assert fig.layout.height == (len(
        fig.data) - 2) * 350  # changepoints do not create separate subplots
    assert fig.layout.showlegend is True  # legend for changepoints
    assert fig.layout.title["text"] == "Component plots"

    assert fig.layout.xaxis.title["text"] == time_col
    assert fig.layout.xaxis2.title["text"] == time_col
    assert fig.layout.xaxis3.title["text"] == "Day of week"
    assert fig.layout.xaxis4.title["text"] == "Time of year"
    assert fig.layout.xaxis5.title["text"] == time_col

    assert fig.layout.yaxis.title["text"] == value_col
    assert fig.layout.yaxis2.title["text"] == "trend"
    assert fig.layout.yaxis3.title["text"] == "weekly"
    assert fig.layout.yaxis4.title["text"] == "yearly"
    assert fig.layout.yaxis5.title["text"] == "events"

    # Check plot_silverkite_components with provided component list and warnings
    with pytest.warns(Warning) as record:
        names = ["YEARLY_SEASONALITY", value_col, "DUMMY"]
        title = "Component plot without trend and weekly seasonality"
        fig = silverkite_diagnostics.plot_silverkite_components(components,
                                                                names=names,
                                                                title=title)

        expected_length = 2
        assert len(fig.data) == expected_length
        assert [fig.data[i].name for i in range(len(fig.data))
                ] == [value_col, "YEARLY_SEASONALITY"]

        assert fig.layout.height == expected_length * 350
        assert fig.layout.showlegend is True
        assert fig.layout.title["text"] == title

        assert fig.layout.xaxis.title["text"] == time_col
        assert fig.layout.xaxis2.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == value_col
        assert fig.layout.yaxis2.title["text"] == "yearly"
        assert f"The following components have not been specified in the model: " \
               f"{{'DUMMY'}}, plotting the rest." in record[0].message.args[0]

    # Check plot_silverkite_components with exception
    with pytest.raises(
            ValueError,
            match=
            "None of the provided components have been specified in the model."
    ):
        names = ["DUMMY"]
        silverkite_diagnostics.plot_silverkite_components(components,
                                                          names=names)
def test_get_silverkite_components():
    """Tests get_silverkite_components function"""
    silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics()

    # Dataframe with trend, seasonality and events
    time_col = "ts"
    # value_col name is chosen such that it contains keywords "ct" and "sin"
    # so that we can test patterns specified for each component work correctly
    value_col = "basin_impact"
    df = pd.DataFrame({
        time_col: [
            datetime.datetime(2018, 1, 1),
            datetime.datetime(2018, 1, 2),
            datetime.datetime(2018, 1, 3),
            datetime.datetime(2018, 1, 4),
            datetime.datetime(2018, 1, 5)
        ],
        value_col: [1, 2, 3, 4, 5],
        "dummy_col": [0, 0, 0, 0, 0],
    })
    feature_df = pd.DataFrame({
        # Trend columns: growth, changepoints and interactions (total 5 columns)
        "ct1":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct1:tod":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "ct_sqrt":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint0_2018_01_02_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "changepoint1_2018_01_04_00":
        np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        # Daily seasonality with interaction (total 4 columns)
        "sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:sin1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "is_weekend[T.True]:cos1_tow_weekly":
        np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        # Yearly seasonality (total 6 columns)
        "sin1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos1_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos2_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "sin3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        "cos3_ct1_yearly":
        np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        # Holiday with pre and post effect (1 at the where the date and event match)
        # e.g. New Years Day is 1 at 1st January, 0 rest of the days
        "Q('events_New Years Day')[T.event]":
        np.array([1.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_1')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_minus_2')[T.event]":
        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_1')[T.event]":
        np.array([0.0, 1.0, 0.0, 0.0, 0.0]),
        "Q('events_New Years Day_plus_2')[T.event]":
        np.array([0.0, 0.0, 1.0, 0.0, 0.0]),
    })
    components = silverkite_diagnostics.get_silverkite_components(
        df, time_col, value_col, feature_df)
    expected_df = pd.DataFrame({
        time_col:
        df[time_col],
        value_col:
        df[value_col],
        "trend":
        5 * np.array([1.0, 1.0, 1.0, 1.0, 1.0]),
        "WEEKLY_SEASONALITY":
        4 * np.array([2.0, 2.0, 2.0, 2.0, 2.0]),
        "YEARLY_SEASONALITY":
        6 * np.array([3.0, 3.0, 3.0, 3.0, 3.0]),
        cst.EVENT_PREFIX:
        np.array([1.0, 1.0, 1.0, 0.0, 0.0]),
        "trend_changepoints":
        np.array([0, 1, 0, 1, 0])
    })
    assert_frame_equal(components, expected_df)

    # Test error messages
    with pytest.raises(ValueError, match="feature_df must be non-empty"):
        silverkite_diagnostics.get_silverkite_components(
            df, time_col, value_col, feature_df=pd.DataFrame())

    with pytest.raises(
            ValueError,
            match="df and feature_df must have same number of rows."):
        silverkite_diagnostics.get_silverkite_components(
            df,
            time_col,
            value_col,
            feature_df=pd.DataFrame({"ts": [1, 2, 3]}))
def test_group_silverkite_seas_components():
    """Tests group_silverkite_seas_components"""
    silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics()
    time_col = "ts"
    # Daily
    date_list = pd.date_range(start="2018-01-01", end="2018-01-07",
                              freq="H").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "DAILY_SEASONALITY": time_df["hour"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Hour of day": np.arange(24.0),
        "daily": np.arange(24.0),
    })
    assert_frame_equal(res, expected_df)

    # Weekly
    date_list = pd.date_range(start="2018-01-01", end="2018-01-20",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "WEEKLY_SEASONALITY": time_df["tow"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Day of week": np.arange(7.0),
        "weekly": np.arange(7.0),
    })
    assert_frame_equal(res, expected_df)

    # Monthly
    date_list = pd.date_range(start="2018-01-01", end="2018-01-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "MONTHLY_SEASONALITY": time_df["dom"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of month": np.arange(31.0) / 31,
        "monthly": np.arange(1.0, 32.0),
    })
    assert_frame_equal(res, expected_df)

    # Quarterly (92 day quarters)
    date_list = pd.date_range(start="2018-07-01", end="2018-12-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "QUARTERLY_SEASONALITY": time_df["toq"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of quarter": np.arange(92.0) / 92,
        "quarterly": np.arange(92.0) / 92,
    })
    assert_frame_equal(res, expected_df)

    # Quarterly (90 day quarter)
    date_list = pd.date_range(start="2018-01-01", end="2018-03-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "QUARTERLY_SEASONALITY": time_df["toq"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of quarter": np.arange(90.0) / 90,
        "quarterly": np.arange(90.0) / 90,
    })
    assert_frame_equal(res, expected_df)

    # Yearly (non-leap years)
    date_list = pd.date_range(start="2018-01-01", end="2019-12-31",
                              freq="D").tolist()
    time_df = build_time_features_df(date_list, conti_year_origin=2018)
    df = pd.DataFrame({
        time_col: time_df["datetime"],
        "YEARLY_SEASONALITY": time_df["toy"]
    })
    res = silverkite_diagnostics.group_silverkite_seas_components(df)
    expected_df = pd.DataFrame({
        "Time of year": np.arange(365.0) / 365,
        "yearly": np.arange(365.0) / 365,
    })
    assert_frame_equal(res, expected_df)
Example #11
0
def apply_default_model_components(model_components=None,
                                   time_properties=None):
    """Sets default values for ``model_components``.

    Parameters
    ----------
    model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or None, default None
        Configuration of model growth, seasonality, events, etc.
        See :func:`~greykite.framework.templates.silverkite_templates.silverkite_template` for details.
    time_properties : `dict` [`str`, `any`] or None, default None
        Time properties dictionary (likely produced by
        `~greykite.common.time_properties_forecast.get_forecast_time_properties`)
        with keys:

        ``"period"`` : `int`
            Period of each observation (i.e. minimum time between observations, in seconds).
        ``"simple_freq"`` : `SimpleTimeFrequencyEnum`
            ``SimpleTimeFrequencyEnum`` member corresponding to data frequency.
        ``"num_training_points"`` : `int`
            Number of observations for training.
        ``"num_training_days"`` : `int`
            Number of days for training.
        ``"start_year"`` : `int`
            Start year of the training period.
        ``"end_year"`` : `int`
            End year of the forecast period.
        ``"origin_for_time_vars"`` : `float`
            Continuous time representation of the first date in ``df``.

    Returns
    -------
    model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam`
        The provided ``model_components`` with default values set
    """
    if model_components is None:
        model_components = ModelComponentsParam()
    else:
        # makes a copy to avoid mutating input
        model_components = dataclasses.replace(model_components)

    # sets default values
    default_seasonality = {
        "fs_components_df": [
            pd.DataFrame({
                "name": ["tod", "tow", "tom", "toq", "toy"],
                "period": [24.0, 7.0, 1.0, 1.0, 1.0],
                "order": [3, 3, 1, 1, 5],
                "seas_names":
                ["daily", "weekly", "monthly", "quarterly", "yearly"]
            })
        ],
    }
    model_components.seasonality = update_dictionary(
        default_seasonality,
        overwrite_dict=model_components.seasonality,
        allow_unknown_keys=False)

    # model_components.growth must be empty.
    # Pass growth terms via `extra_pred_cols` instead.
    default_growth = {}
    model_components.growth = update_dictionary(
        default_growth,
        overwrite_dict=model_components.growth,
        allow_unknown_keys=False)

    default_events = {
        "daily_event_df_dict": [None],
    }
    model_components.events = update_dictionary(
        default_events,
        overwrite_dict=model_components.events,
        allow_unknown_keys=False)

    default_changepoints = {
        "changepoints_dict": [None],
        "seasonality_changepoints_dict": [None],
        # Not allowed, to prevent leaking future information
        # into the past. Pass `changepoints_dict` with method="auto" for
        # automatic detection.
        # "changepoint_detector": [None],
    }
    model_components.changepoints = update_dictionary(
        default_changepoints,
        overwrite_dict=model_components.changepoints,
        allow_unknown_keys=False)

    default_autoregression = {
        "autoreg_dict": [None],
    }
    model_components.autoregression = update_dictionary(
        default_autoregression,
        overwrite_dict=model_components.autoregression,
        allow_unknown_keys=False)

    default_regressors = {}
    model_components.regressors = update_dictionary(
        default_regressors,
        overwrite_dict=model_components.regressors,
        allow_unknown_keys=False)

    default_lagged_regressors = {
        "lagged_regressor_dict": [None],
    }
    model_components.lagged_regressors = update_dictionary(
        default_lagged_regressors,
        overwrite_dict=model_components.lagged_regressors,
        allow_unknown_keys=False)

    default_uncertainty = {
        "uncertainty_dict": [None],
    }
    model_components.uncertainty = update_dictionary(
        default_uncertainty,
        overwrite_dict=model_components.uncertainty,
        allow_unknown_keys=False)

    if time_properties is not None:
        origin_for_time_vars = time_properties.get("origin_for_time_vars")
    else:
        origin_for_time_vars = None

    default_custom = {
        "silverkite":
        [SilverkiteForecast()],  # NB: sklearn creates a copy in grid search
        "silverkite_diagnostics": [SilverkiteDiagnostics()],
        # The same origin for every split, based on start year of full dataset.
        # To use first date of each training split, set to `None` in model_components.
        "origin_for_time_vars": [origin_for_time_vars],
        "extra_pred_cols": ["ct1"],  # linear growth
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
        "max_admissible_value": [None],
    }
    model_components.custom = update_dictionary(
        default_custom,
        overwrite_dict=model_components.custom,
        allow_unknown_keys=False)

    # sets to {} if None, for each item if
    # `model_components.hyperparameter_override` is a list of dictionaries
    model_components.hyperparameter_override = update_dictionaries(
        {}, overwrite_dicts=model_components.hyperparameter_override)

    return model_components
Example #12
0
    def __init__(
            self,
            silverkite: SilverkiteForecast = SilverkiteForecast(),
            silverkite_diagnostics: SilverkiteDiagnostics = SilverkiteDiagnostics(),
            score_func=mean_squared_error,
            coverage=None,
            null_model_params=None,
            origin_for_time_vars=None,
            extra_pred_cols=None,
            train_test_thresh=None,
            training_fraction=None,
            fit_algorithm_dict=None,
            daily_event_df_dict=None,
            fs_components_df=pd.DataFrame({
                "name": ["tod", "tow", "conti_year"],
                "period": [24.0, 7.0, 1.0],
                "order": [3, 3, 5],
                "seas_names": ["daily", "weekly", "yearly"]}),
            autoreg_dict=None,
            lagged_regressor_dict=None,
            changepoints_dict=None,
            seasonality_changepoints_dict=None,
            changepoint_detector=None,
            min_admissible_value=None,
            max_admissible_value=None,
            uncertainty_dict=None,
            normalize_method=None,
            adjust_anomalous_dict=None,
            impute_dict=None,
            regression_weight_col=None,
            forecast_horizon=None,
            simulation_based=False):
        # every subclass of BaseSilverkiteEstimator must call super().__init__
        super().__init__(
            silverkite=silverkite,
            silverkite_diagnostics=silverkite_diagnostics,
            score_func=score_func,
            coverage=coverage,
            null_model_params=null_model_params,
            uncertainty_dict=uncertainty_dict)

        # necessary to set parameters, to ensure get_params() works
        # (used in grid search)
        self.score_func = score_func
        self.coverage = coverage
        self.null_model_params = null_model_params
        self.origin_for_time_vars = origin_for_time_vars
        self.extra_pred_cols = extra_pred_cols
        self.train_test_thresh = train_test_thresh
        self.fit_algorithm_dict = fit_algorithm_dict
        self.training_fraction = training_fraction
        self.daily_event_df_dict = daily_event_df_dict
        self.fs_components_df = fs_components_df
        self.autoreg_dict = autoreg_dict
        self.lagged_regressor_dict = lagged_regressor_dict
        self.changepoints_dict = changepoints_dict
        self.seasonality_changepoints_dict = seasonality_changepoints_dict
        self.changepoint_detector = changepoint_detector
        self.min_admissible_value = min_admissible_value
        self.max_admissible_value = max_admissible_value
        self.uncertainty_dict = uncertainty_dict
        self.normalize_method = normalize_method
        self.adjust_anomalous_dict = adjust_anomalous_dict
        self.impute_dict = impute_dict
        self.regression_weight_col = regression_weight_col
        self.forecast_horizon = forecast_horizon
        self.simulation_based = simulation_based
        self.validate_inputs()