Ejemplo n.º 1
0
    def apply_prophet_model_components_defaults(self,
                                                model_components=None,
                                                time_properties=None):
        """Sets default values for ``model_components``.

        Called by ``get_hyperparameter_grid`` after ``time_properties` is defined.
        Requires ``time_properties`` as well as ``model_components``
        so we do not simply override
        `~greykite.framework.templates.forecast_config_defaults.ForecastConfigDefaults.apply_model_components_defaults`.

        Parameters
        ----------
        model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or None, default None
            Configuration of model growth, seasonality, events, etc.
            See the docstring of this class for details.
        time_properties : `dict` [`str`, `any`] or None, default None
            Time properties dictionary (likely produced by
            `~greykite.common.time_properties_forecast.get_forecast_time_properties`)
            with keys:

                ``"period"`` : `int`
                    Period of each observation (i.e. minimum time between observations, in seconds).
                ``"simple_freq"`` : `SimpleTimeFrequencyEnum`
                    ``SimpleTimeFrequencyEnum`` member corresponding to data frequency.
                ``"num_training_points"`` : `int`
                    Number of observations for training.
                ``"num_training_days"`` : `int`
                    Number of days for training.
                ``"start_year"`` : `int`
                    Start year of the training period.
                ``"end_year"`` : `int`
                    End year of the forecast period.
                ``"origin_for_time_vars"`` : `float`
                    Continuous time representation of the first date in ``df``.

            If None, start_year is set to 2015 and end_year to 2030.

        Returns
        -------
        model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam`
            The provided ``model_components`` with default values set
        """
        if model_components is None:
            model_components = ModelComponentsParam()
        else:
            # makes a copy to avoid mutating input
            model_components = dataclasses.replace(model_components)
        if time_properties is None:
            time_properties = {
                "start_year": 2015,
                "end_year": 2030,
            }

        # seasonality
        default_seasonality = {
            "seasonality_mode": ["additive"],
            "seasonality_prior_scale": [10.0],
            "yearly_seasonality": ['auto'],
            "weekly_seasonality": ['auto'],
            "daily_seasonality": ['auto'],
            "add_seasonality_dict": [None]
        }
        # If seasonality params are not provided, uses default params. Otherwise, prefers provided params.
        # `allow_unknown_keys=False` requires `model_components.seasonality` keys to be a subset of
        # `default_seasonality` keys.
        model_components.seasonality = update_dictionary(
            default_dict=default_seasonality,
            overwrite_dict=model_components.seasonality,
            allow_unknown_keys=False)

        # growth
        default_growth = {"growth_term": ["linear"]}
        model_components.growth = update_dictionary(
            default_dict=default_growth,
            overwrite_dict=model_components.growth,
            allow_unknown_keys=False)

        # events
        default_events = {
            "holiday_lookup_countries":
            "auto",  # see `get_prophet_holidays` for defaults
            "holiday_pre_num_days": [2],
            "holiday_post_num_days": [2],
            "start_year": time_properties["start_year"],
            "end_year": time_properties["end_year"],
            "holidays_prior_scale": [10.0]
        }
        model_components.events = update_dictionary(
            default_dict=default_events,
            overwrite_dict=model_components.events,
            allow_unknown_keys=False)

        # Creates events dictionary for prophet estimator
        # Expands the range of holiday years by 1 year on each end, to ensure we have coverage of most relevant holidays.
        year_list = list(
            range(model_components.events["start_year"] - 1,
                  model_components.events["end_year"] + 2))
        # Currently we support only one set of holiday_lookup_countries, holiday_pre_num_days and holiday_post_num_days.
        # Shows a warning if user supplies >1 set.
        if len(model_components.events["holiday_pre_num_days"]) > 1:
            warnings.warn(
                f"`events['holiday_pre_num_days']` list has more than 1 element. We currently support only 1 element. "
                f"Using {model_components.events['holiday_pre_num_days'][0]}.")
        if len(model_components.events["holiday_post_num_days"]) > 1:
            warnings.warn(
                f"`events['holiday_post_num_days']` list has more than 1 element. We currently support only 1 element. "
                f"Using {model_components.events['holiday_post_num_days'][0]}."
            )
        # If events["holiday_lookup_countries"] has multiple options, picks the first option
        if (model_components.events["holiday_lookup_countries"] is not None and
                model_components.events["holiday_lookup_countries"] != "auto"):
            if len(model_components.events["holiday_lookup_countries"]) > 1:
                # There are multiple elements
                if (any(
                        isinstance(x, list) for x in
                        model_components.events["holiday_lookup_countries"])
                        or None
                        in model_components.events["holiday_lookup_countries"]
                        or "auto" in
                        model_components.events["holiday_lookup_countries"]):
                    # Not a flat list of country names
                    warnings.warn(
                        f"`events['holiday_lookup_countries']` contains multiple options. "
                        f"We currently support only 1 option. Using {model_components.events['holiday_lookup_countries'][0]}."
                    )
                    model_components.events[
                        "holiday_lookup_countries"] = model_components.events[
                            "holiday_lookup_countries"][0]
            elif isinstance(
                    model_components.events["holiday_lookup_countries"][0],
                (list, tuple)):
                # There's only one element, and it's a list of countries
                model_components.events[
                    "holiday_lookup_countries"] = model_components.events[
                        "holiday_lookup_countries"][0]

        model_components.events = {
            "holidays_df":
            self.get_prophet_holidays(
                year_list=year_list,
                countries=model_components.events["holiday_lookup_countries"],
                # holiday effect is modeled from "holiday_pre_num_days" days before
                # to "holiday_post_num_days" days after the holiday
                lower_window=-model_components.events["holiday_pre_num_days"]
                [0],  # Prophet expects a negative value for `lower_window`
                upper_window=model_components.events["holiday_post_num_days"]
                [0]),
            "holidays_prior_scale":
            model_components.events["holidays_prior_scale"]
        }

        # changepoints_dict
        default_changepoints = {
            "changepoint_prior_scale": [0.05],
            "changepoints": [None],
            "n_changepoints": [25],
            "changepoint_range": [0.8]
        }
        model_components.changepoints = update_dictionary(
            default_dict=default_changepoints,
            overwrite_dict=model_components.changepoints,
            allow_unknown_keys=False)

        # uncertainty
        default_uncertainty = {
            "mcmc_samples": [0],
            "uncertainty_samples": [1000]
        }
        model_components.uncertainty = update_dictionary(
            default_dict=default_uncertainty,
            overwrite_dict=model_components.uncertainty,
            allow_unknown_keys=False)

        # regressors
        default_regressors = {"add_regressor_dict": [None]}
        model_components.regressors = update_dictionary(
            default_dict=default_regressors,
            overwrite_dict=model_components.regressors,
            allow_unknown_keys=False)

        # there are no custom parameters for Prophet

        # sets to {} if None, for each item if
        # `model_components.hyperparameter_override` is a list of dictionaries
        model_components.hyperparameter_override = update_dictionaries(
            {}, overwrite_dicts=model_components.hyperparameter_override)

        return model_components
Ejemplo n.º 2
0
def apply_default_model_components(model_components=None,
                                   time_properties=None):
    """Sets default values for ``model_components``.

    Parameters
    ----------
    model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam` or None, default None
        Configuration of model growth, seasonality, events, etc.
        See :func:`~greykite.framework.templates.silverkite_templates.silverkite_template` for details.
    time_properties : `dict` [`str`, `any`] or None, default None
        Time properties dictionary (likely produced by
        `~greykite.common.time_properties_forecast.get_forecast_time_properties`)
        with keys:

        ``"period"`` : `int`
            Period of each observation (i.e. minimum time between observations, in seconds).
        ``"simple_freq"`` : `SimpleTimeFrequencyEnum`
            ``SimpleTimeFrequencyEnum`` member corresponding to data frequency.
        ``"num_training_points"`` : `int`
            Number of observations for training.
        ``"num_training_days"`` : `int`
            Number of days for training.
        ``"start_year"`` : `int`
            Start year of the training period.
        ``"end_year"`` : `int`
            End year of the forecast period.
        ``"origin_for_time_vars"`` : `float`
            Continuous time representation of the first date in ``df``.

    Returns
    -------
    model_components : :class:`~greykite.framework.templates.autogen.forecast_config.ModelComponentsParam`
        The provided ``model_components`` with default values set
    """
    if model_components is None:
        model_components = ModelComponentsParam()
    else:
        # makes a copy to avoid mutating input
        model_components = dataclasses.replace(model_components)

    # sets default values
    default_seasonality = {
        "fs_components_df": [
            pd.DataFrame({
                "name": ["tod", "tow", "tom", "toq", "toy"],
                "period": [24.0, 7.0, 1.0, 1.0, 1.0],
                "order": [3, 3, 1, 1, 5],
                "seas_names":
                ["daily", "weekly", "monthly", "quarterly", "yearly"]
            })
        ],
    }
    model_components.seasonality = update_dictionary(
        default_seasonality,
        overwrite_dict=model_components.seasonality,
        allow_unknown_keys=False)

    # model_components.growth must be empty.
    # Pass growth terms via `extra_pred_cols` instead.
    default_growth = {}
    model_components.growth = update_dictionary(
        default_growth,
        overwrite_dict=model_components.growth,
        allow_unknown_keys=False)

    default_events = {
        "daily_event_df_dict": [None],
    }
    model_components.events = update_dictionary(
        default_events,
        overwrite_dict=model_components.events,
        allow_unknown_keys=False)

    default_changepoints = {
        "changepoints_dict": [None],
        "seasonality_changepoints_dict": [None],
        # Not allowed, to prevent leaking future information
        # into the past. Pass `changepoints_dict` with method="auto" for
        # automatic detection.
        # "changepoint_detector": [None],
    }
    model_components.changepoints = update_dictionary(
        default_changepoints,
        overwrite_dict=model_components.changepoints,
        allow_unknown_keys=False)

    default_autoregression = {
        "autoreg_dict": [None],
    }
    model_components.autoregression = update_dictionary(
        default_autoregression,
        overwrite_dict=model_components.autoregression,
        allow_unknown_keys=False)

    default_regressors = {}
    model_components.regressors = update_dictionary(
        default_regressors,
        overwrite_dict=model_components.regressors,
        allow_unknown_keys=False)

    default_lagged_regressors = {
        "lagged_regressor_dict": [None],
    }
    model_components.lagged_regressors = update_dictionary(
        default_lagged_regressors,
        overwrite_dict=model_components.lagged_regressors,
        allow_unknown_keys=False)

    default_uncertainty = {
        "uncertainty_dict": [None],
    }
    model_components.uncertainty = update_dictionary(
        default_uncertainty,
        overwrite_dict=model_components.uncertainty,
        allow_unknown_keys=False)

    if time_properties is not None:
        origin_for_time_vars = time_properties.get("origin_for_time_vars")
    else:
        origin_for_time_vars = None

    default_custom = {
        "silverkite":
        [SilverkiteForecast()],  # NB: sklearn creates a copy in grid search
        "silverkite_diagnostics": [SilverkiteDiagnostics()],
        # The same origin for every split, based on start year of full dataset.
        # To use first date of each training split, set to `None` in model_components.
        "origin_for_time_vars": [origin_for_time_vars],
        "extra_pred_cols": ["ct1"],  # linear growth
        "fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "min_admissible_value": [None],
        "max_admissible_value": [None],
    }
    model_components.custom = update_dictionary(
        default_custom,
        overwrite_dict=model_components.custom,
        allow_unknown_keys=False)

    # sets to {} if None, for each item if
    # `model_components.hyperparameter_override` is a list of dictionaries
    model_components.hyperparameter_override = update_dictionaries(
        {}, overwrite_dicts=model_components.hyperparameter_override)

    return model_components