def added_variable_resids(results, focus_exog, resid_type=None, use_glm_weights=True, fit_kwargs=None): """ Residualize the endog variable and a 'focus' exog variable in a regression model with respect to the other exog variables. Parameters ---------- results : regression results instance A fitted model including the focus exog and all other predictors of interest. focus_exog : integer or string The column of results.model.exog or a variable name that is to be residualized against the other predictors. resid_type : string The type of residuals to use for the dependent variable. If None, uses `resid_deviance` for GLM/GEE and `resid` otherwise. use_glm_weights : bool Only used if the model is a GLM or GEE. If True, the residuals for the focus predictor are computed using WLS, with the weights obtained from the IRLS calculations for fitting the GLM. If False, unweighted regression is used. fit_kwargs : dict, optional Keyword arguments to be passed to fit when refitting the model. Returns ------- endog_resid : array-like The residuals for the original exog focus_exog_resid : array-like The residuals for the focus predictor Notes ----- The 'focus variable' residuals are always obtained using linear regression. Currently only GLM, GEE, and OLS models are supported. """ model = results.model if not isinstance(model, (GEE, GLM, OLS)): raise ValueError( "model type %s not supported for added variable residuals" % model.__class__.__name__) exog = model.exog endog = model.endog focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) focus_exog_vals = exog[:, focus_col] # Default residuals if resid_type is None: if isinstance(model, (GEE, GLM)): resid_type = "resid_deviance" else: resid_type = "resid" ii = range(exog.shape[1]) ii = list(ii) ii.pop(focus_col) reduced_exog = exog[:, ii] start_params = results.params[ii] klass = model.__class__ kwargs = model._get_init_kwds() new_model = klass(endog, reduced_exog, **kwargs) args = {"start_params": start_params} if fit_kwargs is not None: args.update(fit_kwargs) new_result = new_model.fit(**args) if not new_result.converged: raise ValueError( "fit did not converge when calculating added variable residuals") try: endog_resid = getattr(new_result, resid_type) except AttributeError: raise ValueError("'%s' residual type not available" % resid_type) import statsmodels.regression.linear_model as lm if isinstance(model, (GLM, GEE)) and use_glm_weights: weights = model.family.weights(results.fittedvalues) if hasattr(model, "data_weights"): weights = weights * model.data_weights lm_results = lm.WLS(focus_exog_vals, reduced_exog, weights).fit() else: lm_results = lm.OLS(focus_exog_vals, reduced_exog).fit() focus_exog_resid = lm_results.resid return endog_resid, focus_exog_resid
def ceres_resids(results, focus_exog, frac=0.66, cond_means=None): """ Calculate the CERES residuals (Conditional Expectation Partial Residuals) for a fitted model. Parameters ---------- results : model results instance The fitted model for which the CERES residuals are calculated. focus_exog : int The column of results.model.exog used as the 'focus variable'. frac : float, optional Lowess smoothing parameter for estimating the conditional means. Not used if `cond_means` is provided. cond_means : array-like, optional If provided, the columns of this array are the conditional means E[exog | focus exog], where exog ranges over some or all of the columns of exog other than focus exog. If this is an empty nx0 array, the conditional means are treated as being zero. If None, the conditional means are estimated. Returns ------- An array containing the CERES residuals. Notes ----- If `cond_means` is not provided, it is obtained by smoothing each column of exog (except the focus column) against the focus column. Currently only supports GLM, GEE, and OLS models. """ model = results.model if not isinstance(model, (GLM, GEE, OLS)): raise ValueError("ceres residuals not available for %s" % model.__class__.__name__) focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) # Indices of non-focus columns ix_nf = range(len(results.params)) ix_nf = list(ix_nf) ix_nf.pop(focus_col) nnf = len(ix_nf) # Estimate the conditional means if not provided. if cond_means is None: # Below we calculate E[x | focus] where x is each column other # than the focus column. We don't want the intercept when we do # this so we remove it here. pexog = model.exog[:, ix_nf] pexog -= pexog.mean(0) u, s, vt = np.linalg.svd(pexog, 0) ii = np.flatnonzero(s > 1e-6) pexog = u[:, ii] fcol = model.exog[:, focus_col] cond_means = np.empty((len(fcol), pexog.shape[1])) for j in range(pexog.shape[1]): # Get the fitted values for column i given the other # columns (skip the intercept). y0 = pexog[:, j] cf = lowess(y0, fcol, frac=frac, return_sorted=False) cond_means[:, j] = cf new_exog = np.concatenate((model.exog[:, ix_nf], cond_means), axis=1) # Refit the model using the adjusted exog values klass = model.__class__ init_kwargs = model._get_init_kwds() new_model = klass(model.endog, new_exog, **init_kwargs) new_result = new_model.fit() # The partial residual, with respect to l(x2) (notation of Cook 1998) presid = model.endog - new_result.fittedvalues if isinstance(model, (GLM, GEE)): presid *= model.family.link.deriv(new_result.fittedvalues) if new_exog.shape[1] > nnf: presid += np.dot(new_exog[:, nnf:], new_result.params[nnf:]) return presid
def ceres_resids(results, focus_exog, frac=None, cond_means=None): """ Calculate the CERES residuals (Conditional Expectation Partial Residuals) for a fitted model. Parameters ---------- results : model results instance The fitted model for which the CERES residuals are calculated. focus_exog : int The column of results.model.exog used as the 'focus variable'. frac : dict, optional Map from column indices of results.model.exog to lowess smoothing parameters (the frac keyword argument to lowess). Not used if `cond_means` is provided. cond_means : array-like, optional If provided, the columns of this array are the conditional means E[exog | focus exog], where exog ranges over some or all of the columns of exog other than focus exog. If this is an empty nx0 array, the conditional means are treated as being zero. Returns ------- An array containing the CERES residuals. Notes ----- If `cond_means` is not provided, it is obtained by smoothing each column of exog (except the focus column) against the focus column. The values of `frac` control these lowess smooths. Currently only supports GLM, GEE, and OLS models. """ model = results.model if not isinstance(model, (GLM, GEE, OLS)): raise ValueError("ceres residuals not available for %s" % type(model)) n = model.exog.shape[0] m = model.exog.shape[1] - 1 if frac is None: frac = {} if type(focus_exog) is str: focus_col = results.exog_names.index(focus_exog) else: focus_col = focus_exog # Indices of non-focus columns ii = range(len(results.params)) ii = list(ii) ii.pop(focus_col) if cond_means is None: cond_means = np.zeros((n, m)) x0 = model.exog[:, focus_col] for j, i in enumerate(ii): y0 = model.exog[:, i] fr = frac[i] if i in frac else 0.66 cond_means[:, j] = lowess(y0, x0, frac=fr, return_sorted=False) new_exog = np.concatenate((model.exog[:, ii], cond_means), axis=1) # Refit the model using the adjusted exog values klass = model.__class__ init_kwargs = model._get_init_kwds() new_model = klass(model.endog, new_exog, **init_kwargs) new_result = new_model.fit() # The partial residual, with respect to l(x2) (notation of Cook 1998) presid = model.endog - new_result.fittedvalues if isinstance(model, (GLM, GEE)): presid *= model.family.link.deriv(new_result.fittedvalues) if cond_means.shape[1] > 0: presid += np.dot(cond_means, new_result.params[m:]) return presid
def added_variable_resids(results, focus_exog, resid_type=None, use_glm_weights=True, fit_kwargs=None): """ Residualize the endog variable and a 'focus' exog variable in a regression model with respect to the other exog variables. Parameters ---------- results : regression results instance A fitted model including the focus exog and all other predictors of interest. focus_exog : integer or string The column of results.model.exog or a variable name that is to be residualized against the other predictors. resid_type : string The type of residuals to use for the dependent variable. If None, uses `resid_deviance` for GLM/GEE and `resid` otherwise. use_glm_weights : bool Only used if the model is a GLM or GEE. If True, the residuals for the focus predictor are computed using WLS, with the weights obtained from the IRLS calculations for fitting the GLM. If False, unweighted regression is used. fit_kwargs : dict, optional Keyword arguments to be passed to fit when refitting the model. Returns ------- endog_resid : array-like The residuals for the original exog focus_exog_resid : array-like The residuals for the focus predictor Notes ----- The 'focus variable' residuals are always obtained using linear regression. Currently only GLM, GEE, and OLS models are supported. """ model = results.model if not isinstance(model, (GEE, GLM, OLS)): raise ValueError("model type %s not supported for added variable residuals" % model.__class__.__name__) exog = model.exog endog = model.endog focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) focus_exog_vals = exog[:, focus_col] # Default residuals if resid_type is None: if isinstance(model, (GEE, GLM)): resid_type = "resid_deviance" else: resid_type = "resid" ii = range(exog.shape[1]) ii = list(ii) ii.pop(focus_col) reduced_exog = exog[:, ii] start_params = results.params[ii] klass = model.__class__ kwargs = model._get_init_kwds() new_model = klass(endog, reduced_exog, **kwargs) args = {"start_params": start_params} if fit_kwargs is not None: args.update(fit_kwargs) new_result = new_model.fit(**args) if not new_result.converged: raise ValueError("fit did not converge when calculating added variable residuals") try: endog_resid = getattr(new_result, resid_type) except AttributeError: raise ValueError("'%s' residual type not available" % resid_type) import statsmodels.regression.linear_model as lm if isinstance(model, (GLM, GEE)) and use_glm_weights: weights = model.family.weights(results.fittedvalues) if hasattr(model, "data_weights"): weights = weights * model.data_weights lm_results = lm.WLS(focus_exog_vals, reduced_exog, weights).fit() else: lm_results = lm.OLS(focus_exog_vals, reduced_exog).fit() focus_exog_resid = lm_results.resid return endog_resid, focus_exog_resid