def summary(self, title=None, alpha=.05): """ Summarize the #1lab_results of running MICE. Parameters ----------- title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float Significance level for the confidence intervals Returns ------- smry : Summary instance This holds the summary tables and text, which can be printed or converted to various output formats. """ from statsmodels.iolib import summary2 from statsmodels.compat.collections import OrderedDict smry = summary2.Summary() float_format = "%8.3f" info = OrderedDict() info["Method:"] = "MICE" info["Model:"] = self.model_class.__name__ info["Dependent variable:"] = self.endog_names info["Sample size:"] = "%d" % self.model.data.data.shape[0] info["Scale"] = "%.2f" % self.scale info["Num. imputations"] = "%d" % len(self.model.results_list) smry.add_dict(info, align='l', float_format=float_format) param = summary2.summary_params(self, alpha=alpha) param["FMI"] = self.frac_miss_info smry.add_df(param, float_format=float_format) smry.add_title(title=title, results=self) return smry
def summary(self, title=None, alpha=.05): """ Summarize the results of running MICE. Parameters ----------- title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float Significance level for the confidence intervals Returns ------- smry : Summary instance This holds the summary tables and text, which can be printed or converted to various output formats. """ from statsmodels.iolib import summary2 from statsmodels.compat.collections import OrderedDict smry = summary2.Summary() float_format = "%8.3f" info = OrderedDict() info["Method:"] = "MICE" info["Model:"] = self.model_class.__name__ info["Dependent variable:"] = self.endog_names info["Sample size:"] = "%d" % self.model.data.data.shape[0] info["Scale"] = "%.2f" % self.scale info["Num. imputations"] = "%d" % len(self.model.results_list) smry.add_dict(info, align='l', float_format=float_format) param = summary2.summary_params(self, alpha=alpha) param["FMI"] = self.frac_miss_info smry.add_df(param, float_format=float_format) smry.add_title(title=title, results=self) return smry
def summary(self, title=None, alpha=.05): """ Summarize the results of running multiple imputation. Parameters ---------- title : str, optional Title for the top table. If not None, then this replaces the default title alpha : float Significance level for the confidence intervals Returns ------- smry : Summary instance This holds the summary tables and text, which can be printed or converted to various output formats. """ from statsmodels.iolib import summary2 smry = summary2.Summary() float_format = "%8.3f" info = {} info["Method:"] = "MI" info["Model:"] = self.mi.model.__name__ info["Dependent variable:"] = self._model.endog_names info["Sample size:"] = "%d" % self.mi.imp.data.shape[0] info["Num. imputations"] = "%d" % self.mi.nrep smry.add_dict(info, align='l', float_format=float_format) param = summary2.summary_params(self, alpha=alpha) param["FMI"] = self.fmi smry.add_df(param, float_format=float_format) smry.add_title(title=title, results=self) return smry
def col_params(result, est_fmt='{:.3f}', sig_fmt='({:.2f})', est_scale=100.0, sig_scale=1.0, stars=True, use_t=True, xname=[], **kwargs): '''Stack coefficients and significance stats in single column ''' # Extract parameters res = summary_params(result, xname=xname) # Format float for col in res.columns[:2]: res[col] = res[col].apply(lambda x: est_fmt.format(x * est_scale)) # tvalues in parentheses if use_t: res.iloc[:, 1] = [sig_fmt.format(x * sig_scale) for x in res.iloc[:, 2]] else: res.iloc[:, 1] = [sig_fmt.format(x * sig_scale) for x in res.iloc[:, 3]] # Significance stars if stars: idx = res.iloc[:, 3] < .1 res.iloc[:, 0][idx] = res.iloc[:, 0][idx] + '*' idx = res.iloc[:, 3] < .05 res.iloc[:, 0][idx] = res.iloc[:, 0][idx] + '*' idx = res.iloc[:, 3] < .01 res.iloc[:, 0][idx] = res.iloc[:, 0][idx] + '*' # Stack Coefs and Signif. res = res.iloc[:, :2] res = res.stack() res = pd.DataFrame(res) res.columns = [str(result.model.endog_names)] return res
def _update_statsmodel_result_summary_after_cov_matrix_changed(result): """ Note: inplace """ # Create new param/stderr section of summary new_param_stderr = summary_params(result) new_table = SimpleTable( new_param_stderr.values, headers=list(new_param_stderr.columns), stubs=list(new_param_stderr.index), txt_fmt=fmt_params, ) # Create summary object with param/stderr table replaced summ = result.summary() summ.tables[1] = new_table # Assign summary method of result to return this summary object result.summary = lambda: summ # Repeat steps for summary2, which only requires df and not SimpleTable summ2 = result.summary2() summ2.tables[1] = new_param_stderr result.summary2 = lambda: summ2
def produce_reg_df(model, model_name, panel, reg_type='ols'): y, x = dmatrices(model, panel) if reg_type == 'ols': results = sm.OLS(y, x).fit() estimates = summary_params(results)[['Coef.', 'Std.Err.', 'P>|t|']] ''' White’s (1980) heteroskedasticity robust standard errors. Defined as sqrt(diag(X.T X)^(-1)X.T diag(e_i^(2)) X(X.T X)^(-1) where e_i = resid[i] HC0_se is a property. It is not evaluated until it is called. When it is called the RegressionResults instance will then have another attribute cov_HC0, which is the full heteroskedasticity consistent covariance matrix and also het_scale, which is in this case just resid**2. HCCM matrices are only appropriate for OLS. Note: Delete the following two lines for 'regular' standard errors. ''' estimates['Std.Err.'] = results.HC0_se estimates['P>|t|'] = stats.t.sf( np.abs(estimates['Coef.'] / estimates['Std.Err.']), results.nobs - 1) * 2 elif reg_type == 'probit': model = sm.Probit(y, x) results = model.fit() margeffs = results.get_margeff() estimates = pd.DataFrame( [margeffs.margeff, margeffs.margeff_se, margeffs.pvalues], index=['Coef.', 'Std.Err.', 'P>|t|'], columns=model.exog_names[1:]).T estimates = estimates.apply( lambda x: ['{0:0.3f}'.format(i) for i in x]) estimates['Std.Err.'] = estimates['Std.Err.'].apply( lambda x: '(' + str(x) + ')') for i in range(len(estimates)): estimates['Coef.'].iloc[i] = str(estimates['Coef.'].iloc[i]) + ( (float(estimates['P>|t|'].iloc[i]) <= 0.01) * '_3stars' + (0.01 < float(estimates['P>|t|'].iloc[i]) <= 0.05) * '_2stars' + (0.05 < float(estimates['P>|t|'].iloc[i]) <= 0.10) * '_1star' + (0.1 < float(estimates['P>|t|'].iloc[i])) * '' ) estimates['P>|t|'] = estimates['P>|t|'].apply(lambda x: '') # Instead of inserting lines, just replace pvalues by linespace. estimates = estimates.rename(columns={ 'P>|t|': 'addlinespace'} ) stacked_estimates = pd.DataFrame( estimates.stack(), columns=[model_name]) if reg_type == 'ols': stacked_model_stats = pd.DataFrame( [results.nobs, results.rsquared_adj], index=['Observations', 'R2'], columns=[model_name]) elif reg_type == 'probit': stacked_model_stats = pd.DataFrame( [results.nobs, results.prsquared], index=['Observations', 'R2'], columns=[model_name]) stacked_model = stacked_estimates.append(stacked_model_stats) return stacked_model