Beispiel #1
0
    "trend",
    "low_pass",
    "seasonal_deg",
    "trend_deg",
    "low_pass_deg",
    "robust",
    "seasonal_jump",
    "trend_jump",
    "low_pass_jump",
])

ds = Docstring(STL.fit.__doc__)
_fit_params = ds.extract_parameters(["inner_iter", "outer_iter"])


@Substitution(stl_forecast_params=indent(_stl_forecast_params, "    "))
class STLForecast:
    r"""
    Model-based forecasting using STL to remove seasonality

    Forecasts are produced by first subtracting the seasonality
    estimated using STL, then forecasting the deseasonalized
    data using a time-series model, for example, ARIMA.

    Parameters
    ----------
%(stl_forecast_params)s

    See Also
    --------
    statsmodels.tsa.arima.model.ARIMA
Beispiel #2
0
class STLForecast:
    r"""
    Model-based forecasting using STL to remove seasonality

    Forecasts are produced by first subtracting the seasonality
    estimated using STL, then forecasting the deseasonalized
    data using a time-series model, for example, ARIMA.

    Parameters
    ----------
%(stl_forecast_params)s

    See Also
    --------
    statsmodels.tsa.arima.model.ARIMA
        ARIMA modeling.
    statsmodels.tsa.ar_model.AutoReg
        Autoregressive modeling supporting complex deterministics.
    statsmodels.tsa.exponential_smoothing.ets.ETSModel
        Additive and multiplicative exponential smoothing with trend.
    statsmodels.tsa.statespace.exponential_smoothing.ExponentialSmoothing
        Additive exponential smoothing with trend.

    Notes
    -----
    If :math:`\hat{S}_t` is the seasonal component, then the deseasonalize
    series is constructed as

    .. math::

        Y_t - \hat{S}_t

    The trend component is not removed, and so the time series model should
    be capable of adequately fitting and forecasting the trend if present. The
    out-of-sample forecasts of the seasonal component are produced as

    .. math::

        \hat{S}_{T + h} = \hat{S}_{T - k}

    where :math:`k = m - h + m \lfloor (h-1)/m \rfloor` tracks the period
    offset in the full cycle of 1, 2, ..., m where m is the period length.

    This class is mostly a convenience wrapper around ``STL`` and a
    user-specified model. The model is assumed to follow the standard
    statsmodels pattern:

    * ``fit`` is used to estimate parameters and returns a results instance,
      ``results``.
    * ``results`` must exposes a method ``forecast(steps, **kwargs)`` that
      produces out-of-sample forecasts.
    * ``results`` may also exposes a method ``get_prediction`` that produces
      both in- and out-of-sample predictions.

    Examples
    --------
    >>> import numpy as np
    >>> import pandas as pd
    >>> from statsmodels.tsa.api import STLForecast
    >>> from statsmodels.tsa.arima.model import ARIMA
    >>> from statsmodels.datasets import macrodata
    >>> ds = macrodata.load_pandas()
    >>> data = np.log(ds.data.m1)
    >>> base_date = f"{int(ds.data.year[0])}-{3*int(ds.data.quarter[0])+1}-1"
    >>> data.index = pd.date_range(base_date, periods=data.shape[0], freq="QS")

    Generate forecasts from an ARIMA

    >>> stlf = STLForecast(data, ARIMA, model_kwargs={"order": (2, 1, 0)})
    >>> res = stlf.fit()
    >>> forecasts = res.forecast(12)

    Generate forecasts from an Exponential Smoothing model with trend
    >>> from statsmodels.tsa.statespace import exponential_smoothing
    >>> ES = exponential_smoothing.ExponentialSmoothing
    >>> config = {"trend": True}
    >>> stlf = STLForecast(data, ES, model_kwargs=config)
    >>> res = stlf.fit()
    >>> forecasts = res.forecast(12)
    """
    def __init__(
        self,
        endog,
        model,
        *,
        model_kwargs=None,
        period=None,
        seasonal=7,
        trend=None,
        low_pass=None,
        seasonal_deg=1,
        trend_deg=1,
        low_pass_deg=1,
        robust=False,
        seasonal_jump=1,
        trend_jump=1,
        low_pass_jump=1,
    ):
        self._endog = endog
        self._stl_kwargs = dict(
            period=period,
            seasonal=seasonal,
            trend=trend,
            low_pass=low_pass,
            seasonal_deg=seasonal_deg,
            trend_deg=trend_deg,
            low_pass_deg=low_pass_deg,
            robust=robust,
            seasonal_jump=seasonal_jump,
            trend_jump=trend_jump,
            low_pass_jump=low_pass_jump,
        )
        self._model = model
        self._model_kwargs = {} if model_kwargs is None else model_kwargs
        if not hasattr(model, "fit"):
            raise AttributeError("model must expose a ``fit``  method.")

    @Substitution(fit_params=indent(_fit_params, " " * 8))
    def fit(self, *, inner_iter=None, outer_iter=None, fit_kwargs=None):
        """
        Estimate STL and forecasting model parameters.

        Parameters
        ----------\n%(fit_params)s
        fit_kwargs : Dict[str, Any]
            Any additional keyword arguments to pass to ``model``'s ``fit``
            method when estimating the model on the decomposed residuals.

        Returns
        -------
        STLForecastResults
            Results with forecasting methods.
        """
        fit_kwargs = {} if fit_kwargs is None else fit_kwargs
        stl = STL(self._endog, **self._stl_kwargs)
        stl_fit: DecomposeResult = stl.fit(inner_iter=inner_iter,
                                           outer_iter=outer_iter)
        model_endog = stl_fit.trend + stl_fit.resid
        mod = self._model(model_endog, **self._model_kwargs)
        res = mod.fit(**fit_kwargs)
        if not hasattr(res, "forecast"):
            raise AttributeError(
                "The model's result must expose a ``forecast`` method.")
        return STLForecastResults(stl, stl_fit, mod, res, self._endog)
Beispiel #3
0
class TreatmentEffect(object):
    """Estimate average treatment effect under conditional independence

    This class estimates treatment effect and potential outcome using 5
    different methods, ipw, ra, aipw, aipw-wls, ipw-ra.
    Standard errors and inference are based on the joint GMM representation of
    selection or treatment model, outcome model and effect functions.

    Parameters
    ----------
    model : instance of a model class
        The model class should contain endog and exog for the outcome model.
    treatment : ndarray
        indicator array for observations with treatment (1) or without (0)
    results_select : results instance
        The results instance for the treatment or selection model.
    _cov_type : "HC0"
        Internal keyword. The keyword oes not affect GMMResults which always
        corresponds to HC0 standard errors.
    kwds : keyword arguments
        currently not used

    Notes
    -----
    The outcome model is currently limited to a linear model based on OLS or
    WLS.
    Other outcome models, like Logit and Poisson, will become available in
    future.

    """
    def __init__(self,
                 model,
                 treatment,
                 results_select=None,
                 _cov_type="HC0",
                 **kwds):
        # Note _cov_type is only for preliminary estimators,
        # cov in GMM alwasy corresponds to HC0
        self.__dict__.update(kwds)  # currently not used
        self.treatment = np.asarray(treatment)
        self.treat_mask = treat_mask = (treatment == 1)

        if results_select is not None:
            self.results_select = results_select
            self.prob_select = results_select.predict()

        self.model_pool = model
        endog = model.endog
        exog = model.exog
        self.nobs = endog.shape[0]
        self._cov_type = _cov_type

        # no init keys are supported
        mod0 = model.__class__(endog[~treat_mask], exog[~treat_mask])
        self.results0 = mod0.fit(cov_type=_cov_type)
        mod1 = model.__class__(endog[treat_mask], exog[treat_mask])
        self.results1 = mod1.fit(cov_type=_cov_type)
        # self.predict_mean0 = self.model_pool.predict(self.results0.params
        #                                             ).mean()
        # self.predict_mean1 = self.model_pool.predict(self.results1.params
        #                                             ).mean()

        self.exog_grouped = np.concatenate((mod0.exog, mod1.exog), axis=0)
        self.endog_grouped = np.concatenate((mod0.endog, mod1.endog), axis=0)

    @classmethod
    def from_data(cls, endog, exog, treatment, model='ols', **kwds):
        """create models from data

        not yet implemented

        """
        raise NotImplementedError

    def ipw(self, return_results=True, effect_group="all", disp=False):
        """Inverse Probability Weighted treatment effect estimation.

        Parameters
        ----------
        return_results : bool
            If True, then a results instance is returned.
            If False, just ATE, POM0 and POM1 are returned.
        effect_group : {"all", 0, 1}
            ``effectgroup`` determines for which population the effects are
            estimated.
            If effect_group is "all", then sample average treatment effect and
            potential outcomes are returned
            If effect_group is 1 or "treated", then effects on treated are
            returned.
            If effect_group is 0, "treated" or "control", then effects on
            untreated, i.e. control group, are returned.
        disp : bool
            Indicates whether the scipy optimizer should display the
            optimization results

        Returns
        -------
        TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)

        See Also
        --------
        TreatmentEffectsResults
        """
        endog = self.model_pool.endog
        tind = self.treatment
        prob = self.prob_select
        if effect_group == "all":
            probt = None
        elif effect_group in [1, "treated"]:
            probt = prob
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            probt = 1 - prob
            effect_group = 0  # standardize effect_group name
        elif isinstance(effect_group, np.ndarray):
            probt = effect_group
            effect_group = "user"  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        res_ipw = ate_ipw(endog, tind, prob, weighted=True, probt=probt)

        if not return_results:
            return res_ipw

        # gmm = _TEGMMGeneric1(endog, self.results_select, _mom_ols_te,
        #                     probt=probt)
        gmm = _IPWGMM(endog,
                      self.results_select,
                      None,
                      teff=self,
                      effect_group=effect_group)
        start_params = np.concatenate(
            (res_ipw[:2], self.results_select.params))
        res_gmm = gmm.fit(
            start_params=start_params,
            inv_weights=np.eye(len(start_params)),
            optim_method='nm',
            optim_args={
                "maxiter": 5000,
                "disp": disp
            },
            maxiter=1,
        )

        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group=effect_group,
        )
        return res

    @Substitution(params_returns=indent(doc_params_returns, " " * 8))
    def ra(self, return_results=True, effect_group="all", disp=False):
        """
        Regression Adjustment treatment effect estimation.
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults
        """
        # need indicator for reordered observations
        tind = np.zeros(len(self.treatment))
        tind[-self.treatment.sum():] = 1
        if effect_group == "all":
            probt = None
        elif effect_group in [1, "treated"]:
            probt = tind
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            probt = 1 - tind
            effect_group = 0  # standardize effect_group name
        elif isinstance(effect_group, np.ndarray):
            # TODO: do we keep this?
            probt = effect_group
            effect_group = "user"  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        exog = self.exog_grouped

        # weight or indicator for effect_group
        if probt is not None:
            cw = (probt / probt.mean())
        else:
            cw = 1

        pom0 = (self.results0.predict(exog) * cw).mean()
        pom1 = (self.results1.predict(exog) * cw).mean()
        if not return_results:
            return pom1 - pom0, pom0, pom1

        endog = self.model_pool.endog
        mod_gmm = _RAGMM(endog,
                         self.results_select,
                         None,
                         teff=self,
                         probt=probt)
        start_params = np.concatenate((
            # ate, tt0.effect,
            [pom1 - pom0, pom0],
            self.results0.params,
            self.results1.params))
        res_gmm = mod_gmm.fit(
            start_params=start_params,
            inv_weights=np.eye(len(start_params)),
            optim_method='nm',
            optim_args={
                "maxiter": 5000,
                "disp": disp
            },
            maxiter=1,
        )
        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group=effect_group,
        )
        return res

    @Substitution(params_returns=indent(doc_params_returns2, " " * 8))
    def aipw(self, return_results=True, disp=False):
        """
        ATE and POM from double robust augmented inverse probability weighting
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """

        nobs = self.nobs
        prob = self.prob_select
        tind = self.treatment
        exog = self.model_pool.exog  # in original order
        correct0 = (self.results0.resid / (1 - prob[tind == 0])).sum() / nobs
        correct1 = (self.results1.resid / (prob[tind == 1])).sum() / nobs
        tmean0 = self.results0.predict(exog).mean() + correct0
        tmean1 = self.results1.predict(exog).mean() + correct1
        ate = tmean1 - tmean0
        if not return_results:
            return ate, tmean0, tmean1

        endog = self.model_pool.endog
        p2_aipw = np.asarray([ate, tmean0])

        mag_aipw1 = _AIPWGMM(endog, self.results_select, None, teff=self)
        start_params = np.concatenate(
            (p2_aipw, self.results0.params, self.results1.params,
             self.results_select.params))
        res_gmm = mag_aipw1.fit(start_params=start_params,
                                inv_weights=np.eye(len(start_params)),
                                optim_method='nm',
                                optim_args={
                                    "maxiter": 5000,
                                    "disp": disp
                                },
                                maxiter=1)

        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group="all",
        )
        return res

    @Substitution(params_returns=indent(doc_params_returns2, " " * 8))
    def aipw_wls(self, return_results=True, disp=False):
        """
        ATE and POM from double robust augmented inverse probability weighting.

        This uses weighted outcome regression, while `aipw` uses unweighted
        outcome regression.
        Option for effect on treated or on untreated is not available.
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """
        nobs = self.nobs
        prob = self.prob_select

        endog = self.model_pool.endog
        exog = self.model_pool.exog
        tind = self.treatment
        treat_mask = self.treat_mask

        ww1 = tind / prob * (tind / prob - 1)
        mod1 = WLS(endog[treat_mask],
                   exog[treat_mask],
                   weights=ww1[treat_mask])
        result1 = mod1.fit(cov_type='HC1')
        mean1_ipw2 = result1.predict(exog).mean()

        ww0 = (1 - tind) / (1 - prob) * ((1 - tind) / (1 - prob) - 1)
        mod0 = WLS(endog[~treat_mask],
                   exog[~treat_mask],
                   weights=ww0[~treat_mask])
        result0 = mod0.fit(cov_type='HC1')
        mean0_ipw2 = result0.predict(exog).mean()

        self.results_ipwwls0 = result0
        self.results_ipwwls1 = result1

        correct0 = (result0.resid / (1 - prob[tind == 0])).sum() / nobs
        correct1 = (result1.resid / (prob[tind == 1])).sum() / nobs
        tmean0 = mean0_ipw2 + correct0
        tmean1 = mean1_ipw2 + correct1
        ate = tmean1 - tmean0

        if not return_results:
            return ate, tmean0, tmean1

        p2_aipw_wls = np.asarray([ate, tmean0]).squeeze()

        # GMM
        mod_gmm = _AIPWWLSGMM(endog, self.results_select, None, teff=self)
        start_params = np.concatenate(
            (p2_aipw_wls, result0.params, result1.params,
             self.results_select.params))
        res_gmm = mod_gmm.fit(start_params=start_params,
                              inv_weights=np.eye(len(start_params)),
                              optim_method='nm',
                              optim_args={
                                  "maxiter": 5000,
                                  "disp": disp
                              },
                              maxiter=1)
        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group="all",
        )
        return res

    @Substitution(params_returns=indent(doc_params_returns, " " * 8))
    def ipw_ra(self, return_results=True, effect_group="all", disp=False):
        """
        ATE and POM from inverse probability weighted regression adjustment.

        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """
        treat_mask = self.treat_mask
        endog = self.model_pool.endog
        exog = self.model_pool.exog
        prob = self.prob_select

        prob0 = prob[~treat_mask]
        prob1 = prob[treat_mask]
        if effect_group == "all":
            w0 = 1 / (1 - prob0)
            w1 = 1 / prob1
            exogt = exog
        elif effect_group in [1, "treated"]:
            w0 = prob0 / (1 - prob0)
            w1 = prob1 / prob1
            exogt = exog[treat_mask]
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            w0 = (1 - prob0) / (1 - prob0)
            w1 = (1 - prob1) / prob1
            exogt = exog[~treat_mask]
            effect_group = 0  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        mod0 = WLS(endog[~treat_mask], exog[~treat_mask], weights=w0)
        result0 = mod0.fit(cov_type='HC1')
        # mean0_ipwra = (result0.predict(exog) * (prob / prob.mean())).mean()
        mean0_ipwra = result0.predict(exogt).mean()

        mod1 = WLS(endog[treat_mask], exog[treat_mask], weights=w1)
        result1 = mod1.fit(cov_type='HC1')
        # mean1_ipwra = (result1.predict(exog) * (prob / prob.mean())).mean()
        mean1_ipwra = result1.predict(exogt).mean()

        if not return_results:
            return mean1_ipwra - mean0_ipwra, mean0_ipwra, mean1_ipwra

        # GMM
        mod_gmm = _IPWRAGMM(endog,
                            self.results_select,
                            None,
                            teff=self,
                            effect_group=effect_group)
        start_params = np.concatenate(
            ([mean1_ipwra - mean0_ipwra,
              mean0_ipwra], result0.params, result1.params,
             np.asarray(self.results_select.params)))
        res_gmm = mod_gmm.fit(start_params=start_params,
                              inv_weights=np.eye(len(start_params)),
                              optim_method='nm',
                              optim_args={
                                  "maxiter": 2000,
                                  "disp": disp
                              },
                              maxiter=1)

        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group=effect_group,
        )
        return res