"trend", "low_pass", "seasonal_deg", "trend_deg", "low_pass_deg", "robust", "seasonal_jump", "trend_jump", "low_pass_jump", ]) ds = Docstring(STL.fit.__doc__) _fit_params = ds.extract_parameters(["inner_iter", "outer_iter"]) @Substitution(stl_forecast_params=indent(_stl_forecast_params, " ")) class STLForecast: r""" Model-based forecasting using STL to remove seasonality Forecasts are produced by first subtracting the seasonality estimated using STL, then forecasting the deseasonalized data using a time-series model, for example, ARIMA. Parameters ---------- %(stl_forecast_params)s See Also -------- statsmodels.tsa.arima.model.ARIMA
class STLForecast: r""" Model-based forecasting using STL to remove seasonality Forecasts are produced by first subtracting the seasonality estimated using STL, then forecasting the deseasonalized data using a time-series model, for example, ARIMA. Parameters ---------- %(stl_forecast_params)s See Also -------- statsmodels.tsa.arima.model.ARIMA ARIMA modeling. statsmodels.tsa.ar_model.AutoReg Autoregressive modeling supporting complex deterministics. statsmodels.tsa.exponential_smoothing.ets.ETSModel Additive and multiplicative exponential smoothing with trend. statsmodels.tsa.statespace.exponential_smoothing.ExponentialSmoothing Additive exponential smoothing with trend. Notes ----- If :math:`\hat{S}_t` is the seasonal component, then the deseasonalize series is constructed as .. math:: Y_t - \hat{S}_t The trend component is not removed, and so the time series model should be capable of adequately fitting and forecasting the trend if present. The out-of-sample forecasts of the seasonal component are produced as .. math:: \hat{S}_{T + h} = \hat{S}_{T - k} where :math:`k = m - h + m \lfloor (h-1)/m \rfloor` tracks the period offset in the full cycle of 1, 2, ..., m where m is the period length. This class is mostly a convenience wrapper around ``STL`` and a user-specified model. The model is assumed to follow the standard statsmodels pattern: * ``fit`` is used to estimate parameters and returns a results instance, ``results``. * ``results`` must exposes a method ``forecast(steps, **kwargs)`` that produces out-of-sample forecasts. * ``results`` may also exposes a method ``get_prediction`` that produces both in- and out-of-sample predictions. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from statsmodels.tsa.api import STLForecast >>> from statsmodels.tsa.arima.model import ARIMA >>> from statsmodels.datasets import macrodata >>> ds = macrodata.load_pandas() >>> data = np.log(ds.data.m1) >>> base_date = f"{int(ds.data.year[0])}-{3*int(ds.data.quarter[0])+1}-1" >>> data.index = pd.date_range(base_date, periods=data.shape[0], freq="QS") Generate forecasts from an ARIMA >>> stlf = STLForecast(data, ARIMA, model_kwargs={"order": (2, 1, 0)}) >>> res = stlf.fit() >>> forecasts = res.forecast(12) Generate forecasts from an Exponential Smoothing model with trend >>> from statsmodels.tsa.statespace import exponential_smoothing >>> ES = exponential_smoothing.ExponentialSmoothing >>> config = {"trend": True} >>> stlf = STLForecast(data, ES, model_kwargs=config) >>> res = stlf.fit() >>> forecasts = res.forecast(12) """ def __init__( self, endog, model, *, model_kwargs=None, period=None, seasonal=7, trend=None, low_pass=None, seasonal_deg=1, trend_deg=1, low_pass_deg=1, robust=False, seasonal_jump=1, trend_jump=1, low_pass_jump=1, ): self._endog = endog self._stl_kwargs = dict( period=period, seasonal=seasonal, trend=trend, low_pass=low_pass, seasonal_deg=seasonal_deg, trend_deg=trend_deg, low_pass_deg=low_pass_deg, robust=robust, seasonal_jump=seasonal_jump, trend_jump=trend_jump, low_pass_jump=low_pass_jump, ) self._model = model self._model_kwargs = {} if model_kwargs is None else model_kwargs if not hasattr(model, "fit"): raise AttributeError("model must expose a ``fit`` method.") @Substitution(fit_params=indent(_fit_params, " " * 8)) def fit(self, *, inner_iter=None, outer_iter=None, fit_kwargs=None): """ Estimate STL and forecasting model parameters. Parameters ----------\n%(fit_params)s fit_kwargs : Dict[str, Any] Any additional keyword arguments to pass to ``model``'s ``fit`` method when estimating the model on the decomposed residuals. Returns ------- STLForecastResults Results with forecasting methods. """ fit_kwargs = {} if fit_kwargs is None else fit_kwargs stl = STL(self._endog, **self._stl_kwargs) stl_fit: DecomposeResult = stl.fit(inner_iter=inner_iter, outer_iter=outer_iter) model_endog = stl_fit.trend + stl_fit.resid mod = self._model(model_endog, **self._model_kwargs) res = mod.fit(**fit_kwargs) if not hasattr(res, "forecast"): raise AttributeError( "The model's result must expose a ``forecast`` method.") return STLForecastResults(stl, stl_fit, mod, res, self._endog)
class TreatmentEffect(object): """Estimate average treatment effect under conditional independence This class estimates treatment effect and potential outcome using 5 different methods, ipw, ra, aipw, aipw-wls, ipw-ra. Standard errors and inference are based on the joint GMM representation of selection or treatment model, outcome model and effect functions. Parameters ---------- model : instance of a model class The model class should contain endog and exog for the outcome model. treatment : ndarray indicator array for observations with treatment (1) or without (0) results_select : results instance The results instance for the treatment or selection model. _cov_type : "HC0" Internal keyword. The keyword oes not affect GMMResults which always corresponds to HC0 standard errors. kwds : keyword arguments currently not used Notes ----- The outcome model is currently limited to a linear model based on OLS or WLS. Other outcome models, like Logit and Poisson, will become available in future. """ def __init__(self, model, treatment, results_select=None, _cov_type="HC0", **kwds): # Note _cov_type is only for preliminary estimators, # cov in GMM alwasy corresponds to HC0 self.__dict__.update(kwds) # currently not used self.treatment = np.asarray(treatment) self.treat_mask = treat_mask = (treatment == 1) if results_select is not None: self.results_select = results_select self.prob_select = results_select.predict() self.model_pool = model endog = model.endog exog = model.exog self.nobs = endog.shape[0] self._cov_type = _cov_type # no init keys are supported mod0 = model.__class__(endog[~treat_mask], exog[~treat_mask]) self.results0 = mod0.fit(cov_type=_cov_type) mod1 = model.__class__(endog[treat_mask], exog[treat_mask]) self.results1 = mod1.fit(cov_type=_cov_type) # self.predict_mean0 = self.model_pool.predict(self.results0.params # ).mean() # self.predict_mean1 = self.model_pool.predict(self.results1.params # ).mean() self.exog_grouped = np.concatenate((mod0.exog, mod1.exog), axis=0) self.endog_grouped = np.concatenate((mod0.endog, mod1.endog), axis=0) @classmethod def from_data(cls, endog, exog, treatment, model='ols', **kwds): """create models from data not yet implemented """ raise NotImplementedError def ipw(self, return_results=True, effect_group="all", disp=False): """Inverse Probability Weighted treatment effect estimation. Parameters ---------- return_results : bool If True, then a results instance is returned. If False, just ATE, POM0 and POM1 are returned. effect_group : {"all", 0, 1} ``effectgroup`` determines for which population the effects are estimated. If effect_group is "all", then sample average treatment effect and potential outcomes are returned If effect_group is 1 or "treated", then effects on treated are returned. If effect_group is 0, "treated" or "control", then effects on untreated, i.e. control group, are returned. disp : bool Indicates whether the scipy optimizer should display the optimization results Returns ------- TreatmentEffectsResults instance or tuple (ATE, POM0, POM1) See Also -------- TreatmentEffectsResults """ endog = self.model_pool.endog tind = self.treatment prob = self.prob_select if effect_group == "all": probt = None elif effect_group in [1, "treated"]: probt = prob effect_group = 1 # standardize effect_group name elif effect_group in [0, "untreated", "control"]: probt = 1 - prob effect_group = 0 # standardize effect_group name elif isinstance(effect_group, np.ndarray): probt = effect_group effect_group = "user" # standardize effect_group name else: raise ValueError("incorrect option for effect_group") res_ipw = ate_ipw(endog, tind, prob, weighted=True, probt=probt) if not return_results: return res_ipw # gmm = _TEGMMGeneric1(endog, self.results_select, _mom_ols_te, # probt=probt) gmm = _IPWGMM(endog, self.results_select, None, teff=self, effect_group=effect_group) start_params = np.concatenate( (res_ipw[:2], self.results_select.params)) res_gmm = gmm.fit( start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={ "maxiter": 5000, "disp": disp }, maxiter=1, ) res = TreatmentEffectResults( self, res_gmm, "IPW", start_params=start_params, effect_group=effect_group, ) return res @Substitution(params_returns=indent(doc_params_returns, " " * 8)) def ra(self, return_results=True, effect_group="all", disp=False): """ Regression Adjustment treatment effect estimation. \n%(params_returns)s See Also -------- TreatmentEffectsResults """ # need indicator for reordered observations tind = np.zeros(len(self.treatment)) tind[-self.treatment.sum():] = 1 if effect_group == "all": probt = None elif effect_group in [1, "treated"]: probt = tind effect_group = 1 # standardize effect_group name elif effect_group in [0, "untreated", "control"]: probt = 1 - tind effect_group = 0 # standardize effect_group name elif isinstance(effect_group, np.ndarray): # TODO: do we keep this? probt = effect_group effect_group = "user" # standardize effect_group name else: raise ValueError("incorrect option for effect_group") exog = self.exog_grouped # weight or indicator for effect_group if probt is not None: cw = (probt / probt.mean()) else: cw = 1 pom0 = (self.results0.predict(exog) * cw).mean() pom1 = (self.results1.predict(exog) * cw).mean() if not return_results: return pom1 - pom0, pom0, pom1 endog = self.model_pool.endog mod_gmm = _RAGMM(endog, self.results_select, None, teff=self, probt=probt) start_params = np.concatenate(( # ate, tt0.effect, [pom1 - pom0, pom0], self.results0.params, self.results1.params)) res_gmm = mod_gmm.fit( start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={ "maxiter": 5000, "disp": disp }, maxiter=1, ) res = TreatmentEffectResults( self, res_gmm, "IPW", start_params=start_params, effect_group=effect_group, ) return res @Substitution(params_returns=indent(doc_params_returns2, " " * 8)) def aipw(self, return_results=True, disp=False): """ ATE and POM from double robust augmented inverse probability weighting \n%(params_returns)s See Also -------- TreatmentEffectsResults """ nobs = self.nobs prob = self.prob_select tind = self.treatment exog = self.model_pool.exog # in original order correct0 = (self.results0.resid / (1 - prob[tind == 0])).sum() / nobs correct1 = (self.results1.resid / (prob[tind == 1])).sum() / nobs tmean0 = self.results0.predict(exog).mean() + correct0 tmean1 = self.results1.predict(exog).mean() + correct1 ate = tmean1 - tmean0 if not return_results: return ate, tmean0, tmean1 endog = self.model_pool.endog p2_aipw = np.asarray([ate, tmean0]) mag_aipw1 = _AIPWGMM(endog, self.results_select, None, teff=self) start_params = np.concatenate( (p2_aipw, self.results0.params, self.results1.params, self.results_select.params)) res_gmm = mag_aipw1.fit(start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={ "maxiter": 5000, "disp": disp }, maxiter=1) res = TreatmentEffectResults( self, res_gmm, "IPW", start_params=start_params, effect_group="all", ) return res @Substitution(params_returns=indent(doc_params_returns2, " " * 8)) def aipw_wls(self, return_results=True, disp=False): """ ATE and POM from double robust augmented inverse probability weighting. This uses weighted outcome regression, while `aipw` uses unweighted outcome regression. Option for effect on treated or on untreated is not available. \n%(params_returns)s See Also -------- TreatmentEffectsResults """ nobs = self.nobs prob = self.prob_select endog = self.model_pool.endog exog = self.model_pool.exog tind = self.treatment treat_mask = self.treat_mask ww1 = tind / prob * (tind / prob - 1) mod1 = WLS(endog[treat_mask], exog[treat_mask], weights=ww1[treat_mask]) result1 = mod1.fit(cov_type='HC1') mean1_ipw2 = result1.predict(exog).mean() ww0 = (1 - tind) / (1 - prob) * ((1 - tind) / (1 - prob) - 1) mod0 = WLS(endog[~treat_mask], exog[~treat_mask], weights=ww0[~treat_mask]) result0 = mod0.fit(cov_type='HC1') mean0_ipw2 = result0.predict(exog).mean() self.results_ipwwls0 = result0 self.results_ipwwls1 = result1 correct0 = (result0.resid / (1 - prob[tind == 0])).sum() / nobs correct1 = (result1.resid / (prob[tind == 1])).sum() / nobs tmean0 = mean0_ipw2 + correct0 tmean1 = mean1_ipw2 + correct1 ate = tmean1 - tmean0 if not return_results: return ate, tmean0, tmean1 p2_aipw_wls = np.asarray([ate, tmean0]).squeeze() # GMM mod_gmm = _AIPWWLSGMM(endog, self.results_select, None, teff=self) start_params = np.concatenate( (p2_aipw_wls, result0.params, result1.params, self.results_select.params)) res_gmm = mod_gmm.fit(start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={ "maxiter": 5000, "disp": disp }, maxiter=1) res = TreatmentEffectResults( self, res_gmm, "IPW", start_params=start_params, effect_group="all", ) return res @Substitution(params_returns=indent(doc_params_returns, " " * 8)) def ipw_ra(self, return_results=True, effect_group="all", disp=False): """ ATE and POM from inverse probability weighted regression adjustment. \n%(params_returns)s See Also -------- TreatmentEffectsResults """ treat_mask = self.treat_mask endog = self.model_pool.endog exog = self.model_pool.exog prob = self.prob_select prob0 = prob[~treat_mask] prob1 = prob[treat_mask] if effect_group == "all": w0 = 1 / (1 - prob0) w1 = 1 / prob1 exogt = exog elif effect_group in [1, "treated"]: w0 = prob0 / (1 - prob0) w1 = prob1 / prob1 exogt = exog[treat_mask] effect_group = 1 # standardize effect_group name elif effect_group in [0, "untreated", "control"]: w0 = (1 - prob0) / (1 - prob0) w1 = (1 - prob1) / prob1 exogt = exog[~treat_mask] effect_group = 0 # standardize effect_group name else: raise ValueError("incorrect option for effect_group") mod0 = WLS(endog[~treat_mask], exog[~treat_mask], weights=w0) result0 = mod0.fit(cov_type='HC1') # mean0_ipwra = (result0.predict(exog) * (prob / prob.mean())).mean() mean0_ipwra = result0.predict(exogt).mean() mod1 = WLS(endog[treat_mask], exog[treat_mask], weights=w1) result1 = mod1.fit(cov_type='HC1') # mean1_ipwra = (result1.predict(exog) * (prob / prob.mean())).mean() mean1_ipwra = result1.predict(exogt).mean() if not return_results: return mean1_ipwra - mean0_ipwra, mean0_ipwra, mean1_ipwra # GMM mod_gmm = _IPWRAGMM(endog, self.results_select, None, teff=self, effect_group=effect_group) start_params = np.concatenate( ([mean1_ipwra - mean0_ipwra, mean0_ipwra], result0.params, result1.params, np.asarray(self.results_select.params))) res_gmm = mod_gmm.fit(start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={ "maxiter": 2000, "disp": disp }, maxiter=1) res = TreatmentEffectResults( self, res_gmm, "IPW", start_params=start_params, effect_group=effect_group, ) return res