def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright # time and names are always included import time time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) # create dictionary with default # use lambdas because some values raise exception if they are not available default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), # ('Model type:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), ('No. Observations:', lambda: [d_or_f(results.nobs)]), ('Df Model:', lambda: [d_or_f(results.df_model)]), ('Df Residuals:', lambda: [d_or_f(results.df_resid)]), ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) # doesn't exist for RLM - exception # ('Method:', lambda: [???]), # no default for this ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: # default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None), ('Df model:', None), ('Df resid:', None)] try: llf = results.llf gen_left.append(('Log-Likelihood', None)) except: # AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore #replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ #check missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values #pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): #fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): #fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) #padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = fmt_2cols #gen_fmt ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table #, gen_table_left, gen_table_right
## print 'llf: ', res.llf ## print 'R2 ', res.rsquared ## print 'R2 adj', res.rsquared_adj prederr2 = 0. for inidx, outidx in LeaveOneOut(len(y0)): resl1o = sm.OLS(y0[inidx], fact_wconst[inidx,:]).fit() #print data.endog[outidx], res.model.predict(data.exog[outidx,:]), prederr2 += (y0[outidx] - resl1o.predict(fact_wconst[outidx,:]))**2. results.append([k, res.aic, res.bic, res.rsquared_adj, prederr2]) results = np.array(results) print(results) print('best result for k, by AIC, BIC, R2_adj, L1O') print(np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0), np.argmin(results[:,-1],0))]) from statsmodels.iolib.table import SimpleTable headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ') numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f' txt_fmt1 = dict(data_fmts = numformat) tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1) print("PCA regression on simulated data,") print("DGP: 2 factors and 4 explanatory variables") print(tabl) print("Notes: k is number of components of PCA,") print(" constant is added additionally") print(" k=0 means regression on constant only") print(" L1O: sum of squared prediction errors for leave-one-out")
def add_table(self, res, header, index, title): table = SimpleTable(res, header, index, title) self.tables.append(table)
title_align='r', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') cell0data = 0.0000 cell1data = 1 row0data = [cell0data, cell1data] row1data = [2, 3.333] table1data = [row0data, row1data] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') #test1header = ('header1\nheader1a', 'header2\nheader2a') tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1, ltx_fmt=ltx_fmt1, html_fmt=html_fmt1) def custom_labeller(cell): if cell.data is np.nan: return 'missing' class TestCell: def test_celldata(self): celldata = cell0data, cell1data, row1data[0], row1data[1] cells = [ Cell(datum, datatype=i % 2) for i, datum in enumerate(celldata) ]
def summary(self): """ Summarize the fitted Model Returns ------- smry : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary from statsmodels.iolib.table import SimpleTable model = self.model title = model.__class__.__name__ + ' Model Results' dep_variable = 'endog' if isinstance(self.model.endog, pd.DataFrame): dep_variable = self.model.endog.columns[0] elif isinstance(self.model.endog, pd.Series): dep_variable = self.model.endog.name seasonal_periods = None if self.model.seasonal is None else self.model.seasonal_periods lookup = { 'add': 'Additive', 'additive': 'Additive', 'mul': 'Multiplicative', 'multiplicative': 'Multiplicative', None: 'None' } transform = self.params['use_boxcox'] box_cox_transform = True if transform else False box_cox_coeff = transform if isinstance( transform, string_types) else self.params['lamda'] if isinstance(box_cox_coeff, float): box_cox_coeff = '{:>10.5f}'.format(box_cox_coeff) top_left = [('Dep. Variable:', [dep_variable]), ('Model:', [model.__class__.__name__]), ('Optimized:', [str(np.any(self.optimized))]), ('Trend:', [lookup[self.model.trend]]), ('Seasonal:', [lookup[self.model.seasonal]]), ('Seasonal Periods:', [str(seasonal_periods)]), ('Box-Cox:', [str(box_cox_transform)]), ('Box-Cox Coeff.:', [str(box_cox_coeff)])] top_right = [('No. Observations:', [str(len(self.model.endog))]), ('SSE', ['{:5.3f}'.format(self.sse)]), ('AIC', ['{:5.3f}'.format(self.aic)]), ('BIC', ['{:5.3f}'.format(self.bic)]), ('AICC', ['{:5.3f}'.format(self.aicc)]), ('Date:', None), ('Time:', None)] smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) formatted = self.params_formatted # type: pd.DataFrame def _fmt(x): abs_x = np.abs(x) scale = 1 if abs_x != 0: scale = int(np.log10(abs_x)) if scale > 4 or scale < -3: return '{:>20.5g}'.format(x) dec = min(7 - scale, 7) fmt = '{{:>20.{0}f}}'.format(dec) return fmt.format(x) tab = [] for _, vals in formatted.iterrows(): tab.append([ _fmt(vals.iloc[1]), '{0:>20}'.format(vals.iloc[0]), '{0:>20}'.format(str(bool(vals.iloc[2]))) ]) params_table = SimpleTable(tab, headers=['coeff', 'code', 'optimized'], title="", stubs=list(formatted.index)) smry.tables.append(params_table) return smry
def summary_table(title=None, dep_var='', model_name='', method='', date='', time='', aic=None, bic=None, num_obs=None, df_resid=None, df_model=None, rho_squared=None, rho_bar_squared=None, log_likelihood=None, null_log_likelihood=None, x_names=[], coefs=[], std_errs=[], t_scores=[], alpha=None): """ Generate a summary table of estimation results using Statsmodels SimpleTable. Still a work in progress. SimpleTable is maddening to work with, so it would be nice to find an alternative. It would need to support pretty-printing of formatted tables to plaintext and ideally also to HTML and Latex. At first it looked like we could use Statsmodels's summary table generator directly (iolib.summary.Summary), but this requires a Statsmodels results object as input and doesn't document which properties are pulled from it. PyLogit reverse engineered this for use in get_statsmodels_summary() -- so it's possible, but could be hard to maintain in the long run. We can't use PyLogit's summary table generator either. It requires a PyLogit model class as input, and we can't create one from results parameters. Oh well! """ def fmt(value, format_str): # Custom numeric->string formatter that gracefully accepts null values return '' if value is None else format_str.format(value) if (title is None): title = "CHOICEMODELS ESTIMATION RESULTS" top_left = [['Dep. Var.:', dep_var], ['Model:', model_name], ['Method:', method], ['Date:', date], ['Time:', time], ['AIC:', fmt(aic, "{:,.3f}")], ['BIC:', fmt(bic, "{:,.3f}")]] top_right = [['No. Observations:', fmt(num_obs, "{:,}")], ['Df Residuals:', fmt(df_resid, "{:,}")], ['Df Model:', fmt(df_model, "{:,}")], ['Pseudo R-squ.:', fmt(rho_squared, "{:.3f}")], ['Pseudo R-bar-squ.:', fmt(rho_bar_squared, "{:.3f}")], ['Log-Likelihood:', fmt(log_likelihood, "{:,.3f}")], ['LL-Null:', fmt(null_log_likelihood, "{:,.3f}")]] # Zip into a single table (each side needs same number of entries) header_cells = [top_left[i] + top_right[i] for i in range(len(top_left))] # See end of statsmodels.iolib.table.py for formatting options header_fmt = dict(table_dec_below='', data_aligns='lrlr', colwidths=11, colsep=' ', empty_cell='') header = SimpleTable(header_cells, title=title, txt_fmt=header_fmt) col_labels = ['coef', 'std err', 'z', 'P>|z|', 'Conf. Int.'] row_labels = x_names body_cells = [ [ fmt(coefs[i], "{:,.4f}"), fmt(std_errs[i], "{:,.3f}"), fmt(t_scores[i], "{:,.3f}"), '', # p-value placeholder '' ] # conf int placeholder for i in range(len(x_names)) ] body_fmt = dict(table_dec_below='=', header_align='r', data_aligns='r', colwidths=7, colsep=' ') body = SimpleTable(body_cells, headers=col_labels, stubs=row_labels, txt_fmt=body_fmt) # Ideally we'd want to append these into a single table, but I can't get it to work # without completely messing up the formatting.. return (header, body)
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'User-specified Parameters'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [ ('R-squared:', '--'), ('Adj. R-squared:', '--'), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('', ''), ('', ''), ] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = self._names header = ['coef'] vals = (self.params, ) formats = [(10, 4)] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ('Results generated with user-specified parameters.', 'Since the model was not estimated, there are no std. ' 'errors.') smry.add_extra_txt(extra_text) return smry
def summary(self,evaluator): tbl = SimpleTable(self.summary_data(evaluator),['30','60','90','120'],['max','75%','median','25%','min'],title="Volatility Cone") return tbl
def summary_impacts(self, impact_date=None, impacted_variable=None, groupby='impact date', show_revisions=None, sparsify=True, float_format='%.2f'): """ Create summary table with detailed impacts from news; by date, variable Parameters ---------- impact_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular impact periods to display. The impact date(s) describe the periods in which impacted variables were *affected* by the news. If this argument is given, the output table will only show this impact date or dates. Note that this argument is passed to the Pandas `loc` accessor, and so it should correspond to the labels of the model's index. If the model was created with data in a list or numpy array, then these lables will be zero-indexes observation integers. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. groupby : {impact date, impacted date} The primary variable for grouping results in the impacts table. The default is to group by update date. show_revisions : bool, optional If set to False, the impacts table will not show the impacts from data revisions or the total impacts. Default is to show the revisions and totals columns if any revisions were made and otherwise to hide them. sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. float_format : str, optional Formatter format string syntax for convering numbers to strings. Default is '%.2f'. Returns ------- impacts_table : SimpleTable Table describing total impacts from both revisions and news. See the documentation for the `impacts` attribute for more details about the index and columns. See Also -------- impacts """ # Squeeze for univariate models if impacted_variable is None and self.updated.model.k_endog == 1: impacted_variable = self.updated.model.endog_names # Default is to only show the revisions columns if there were any # revisions (otherwise it would just be a column of zeros) if show_revisions is None: show_revisions = len(self.revisions_iloc) > 0 # Select only the variables / dates of interest s = list(np.s_[:, :]) if impact_date is not None: s[0] = np.s_[impact_date] if impacted_variable is not None: s[1] = np.s_[impacted_variable] s = tuple(s) impacts = self.impacts.loc[s, :] # Make the first index level the groupby level groupby = groupby.lower() if groupby in ['impacted variable', 'impacted_variable']: impacts.index = impacts.index.swaplevel(1, 0) elif groupby not in ['impact date', 'impact_date']: raise ValueError('Invalid groupby for impacts table. Valid options' ' are "impact date" or "impacted variable".' f'Got "{groupby}".') impacts = impacts.sort_index() # Drop the non-groupby level if there's only one value k_vars = len(impacts.index.remove_unused_levels().levels[1]) removed_level = None if sparsify and k_vars == 1: name = impacts.index.names[1] value = impacts.index.levels[1][0] removed_level = f'{name} = {value}' impacts.index = impacts.index.droplevel(1) impacts = impacts.applymap( lambda num: '' if pd.isnull(num) else float_format % num) impacts = impacts.reset_index() impacts.iloc[:, 0] = impacts.iloc[:, 0].map(str) else: impacts = impacts.reset_index() impacts.iloc[:, :2] = impacts.iloc[:, :2].applymap(str) impacts.iloc[:, 2:] = impacts.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else float_format % num) # Sparsify the groupby column if sparsify and groupby in impacts: mask = impacts[groupby] == impacts[groupby].shift(1) impacts.loc[mask, groupby] = '' # Drop revisions and totals columns if applicable if not show_revisions: impacts.drop(['impact of revisions', 'total impact'], axis=1, inplace=True) params_data = impacts.values params_header = impacts.columns.tolist() params_stubs = None title = 'Impacts' if removed_level is not None: join = 'on' if groupby == 'date' else 'for' title += f' {join} [{removed_level}]' impacts_table = SimpleTable(params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return impacts_table
# Estimate an OLS model for comparison res_ols = sm.OLS(y, X).fit() # Compare the estimated parameters in WLS and OLS print res_ols.params print res_wls.params # Compare the WLS standard errors to heteroscedasticity corrected OLS standard # errors: se = np.vstack([[res_wls.bse], [res_ols.bse], [res_ols.HC0_se], [res_ols.HC1_se], [res_ols.HC2_se], [res_ols.HC3_se]]) se = np.round(se, 4) colnames = ['x1', 'const'] rownames = ['WLS', 'OLS', 'OLS_HC0', 'OLS_HC1', 'OLS_HC3', 'OLS_HC3'] tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt) print tabl # Calculate OLS prediction interval covb = res_ols.cov_params() prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1) prediction_std = np.sqrt(prediction_var) tppf = stats.t.ppf(0.975, res_ols.df_resid) # Draw a plot to compare predicted values in WLS and OLS: prstd, iv_l, iv_u = wls_prediction_std(res_wls) plt.figure() plt.plot(x, y, 'o', x, y_true, 'b-') plt.plot(x, res_ols.fittedvalues, 'r--') plt.plot(x, res_ols.fittedvalues + tppf * prediction_std, 'r--') plt.plot(x, res_ols.fittedvalues - tppf * prediction_std, 'r--')
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None, title=None): '''create summary table of regression parameters with several equations This allows interleaving of parameters with bse and/or tvalues Parameters ---------- result : result instance the result instance with params and attributes in extras extras : list[str] additional attributes to add below a parameter row, e.g. bse or tvalues endog_names : {list[str], None} names for rows of the parameter array (multivariate endog) exog_names : {list[str], None} names for columns of the parameter array (exog) alpha : float level for confidence intervals, default 0.95 title : None or string Returns ------- tables : list of SimpleTable this contains a list of all seperate Subtables table_all : SimpleTable the merged table with results concatenated for each row of the parameter array ''' if endog_names is None: # TODO: note the [1:] is specific to current MNLogit endog_names = [ 'endog_%d' % i for i in np.unique(result.model.endog)[1:] ] if exog_names is None: exog_names = ['var%d' % i for i in range(len(result.params))] # TODO: check formatting options with different values res_params = [[forg(item, prec=4) for item in row] for row in result.params] if extras: extras_list = [[[ '%10s' % ('(' + forg(v, prec=3).strip() + ')') for v in col ] for col in getattr(result, what)] for what in extras] data = lzip(res_params, *extras_list) data = [i for j in data for i in j] #flatten stubs = lzip(endog_names, *[[''] * len(endog_names)] * len(extras)) stubs = [i for j in stubs for i in j] #flatten else: data = res_params stubs = endog_names txt_fmt = copy.deepcopy(fmt_params) txt_fmt["data_fmts"] = ["%s"] * result.params.shape[1] return SimpleTable(data, headers=exog_names, stubs=stubs, title=title, txt_fmt=txt_fmt)
def test_SimpleTable_4(self): # Basic test, test_SimpleTable_4 test uses custom txt_fmt txt_fmt1 = dict(data_fmts=['%3.2f', '%d'], empty_cell=' ', colwidths=1, colsep=' * ', row_pre='* ', row_post=' *', table_dec_above='*', table_dec_below='*', header_dec_below='*', header_fmt='%s', stub_fmt='%s', title_align='r', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') ltx_fmt1 = default_latex_fmt.copy() html_fmt1 = default_html_fmt.copy() cell0data = 0.0000 cell1data = 1 row0data = [cell0data, cell1data] row1data = [2, 3.333] table1data = [row0data, row1data] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1, ltx_fmt=ltx_fmt1, html_fmt=html_fmt1) def test_txt_fmt1(self): # Limited test of custom txt_fmt desired = """ ***************************** * * header1 * header2 * ***************************** * stub1 * 0.00 * 1 * * stub2 * 2.00 * 3 * ***************************** """ actual = '\n%s\n' % tbl.as_text() #print(actual) #print(desired) self.assertEqual(actual, desired) def test_ltx_fmt1(self): # Limited test of custom ltx_fmt desired = r""" \begin{tabular}{lcc} \toprule & \textbf{header1} & \textbf{header2} \\ \midrule \textbf{stub1} & 0.0 & 1 \\ \textbf{stub2} & 2 & 3.333 \\ \bottomrule \end{tabular} """ actual = '\n%s\n' % tbl.as_latex_tabular() #print(actual) #print(desired) self.assertEqual(actual, desired) def test_html_fmt1(self): # Limited test of custom html_fmt desired = """ <table class="simpletable"> <tr> <td></td> <th>header1</th> <th>header2</th> </tr> <tr> <th>stub1</th> <td>0.0</td> <td>1</td> </tr> <tr> <th>stub2</th> <td>2</td> <td>3.333</td> </tr> </table> """ actual = '\n%s\n' % tbl.as_html() self.assertEqual(actual, desired)
def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True, skip_header=False): '''create a summary table for the parameters Parameters ---------- res : results instance some required information is directly taken from the result instance yname : string or None optional name for the endogenous variable, default is "y" xname : list of strings or None optional names for the exogenous variables, default is "var_xx" alpha : float significance level for the confidence intervals use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) skip_headers : bool If false (default), then the header row is added. If true, then no header row is added. Returns ------- params_table : SimpleTable instance ''' #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal if isinstance(results, tuple): #for multivariate endog #TODO: check whether I don't want to refactor this #we need to give parameter alpha to conf_int results, params, std_err, tvalues, pvalues, conf_int = results else: params = results.params std_err = results.bse tvalues = results.tvalues #is this sometimes called zvalues pvalues = results.pvalues conf_int = results.conf_int(alpha) #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' if use_t: param_header = ['coef', 'std err', 't', 'P>|t|', '[' + alp + ' Conf. Int.]'] else: param_header = ['coef', 'std err', 'z', 'P>|z|', '[' + alp + ' Conf. Int.]'] if skip_header: param_header = None _, xname = _getnames(results, yname=yname, xname=xname) params_stubs = xname exog_idx = xrange(len(xname)) #center confidence intervals if they are unequal lengths # confint = ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \ # exog_idx] confint = ["%s %s" % tuple(map(forg, conf_int[i])) for i in \ exog_idx] len_ci = map(len, confint) max_ci = max(len_ci) min_ci = min(len_ci) if min_ci < max_ci: confint = [ci.center(max_ci) for ci in confint] #explicit f/g formatting, now uses forg, f or g depending on values # params_data = zip(["%#6.4g" % (params[i]) for i in exog_idx], # ["%#6.4f" % (std_err[i]) for i in exog_idx], # ["%#6.3f" % (tvalues[i]) for i in exog_idx], # ["%#6.3f" % (pvalues[i]) for i in exog_idx], # confint ## ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \ ## exog_idx] # ) params_data = zip([forg(params[i], prec=4) for i in exog_idx], [forg(std_err[i]) for i in exog_idx], [forg(tvalues[i]) for i in exog_idx], ["%#6.3f" % (pvalues[i]) for i in exog_idx], confint # ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \ # exog_idx] ) parameter_table = SimpleTable(params_data, param_header, params_stubs, title = None, txt_fmt = fmt_params #gen_fmt #fmt_2, #gen_fmt, ) return parameter_table
def summary_table(res, alpha=0.05): """ Generate summary table of outlier and influence similar to SAS Parameters ---------- alpha : float significance level for confidence interval Returns ------- st : SimpleTable instance table with results that can be printed data : ndarray calculated measures and statistics for the table ss2 : list of strings column_names for table (Note: rows of table are observations) """ from scipy import stats from statsmodels.sandbox.regression.predstd import wls_prediction_std infl = OLSInfluence(res) #standard error for predicted mean #Note: using hat_matrix only works for fitted values predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid) tppf = stats.t.isf(alpha/2., res.df_resid) predict_mean_ci = np.column_stack([ res.fittedvalues - tppf * predict_mean_se, res.fittedvalues + tppf * predict_mean_se]) #standard error for predicted observation tmp = wls_prediction_std(res, alpha=alpha) predict_se, predict_ci_low, predict_ci_upp = tmp predict_ci = np.column_stack((predict_ci_low, predict_ci_upp)) #standard deviation of residual resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag)) table_sm = np.column_stack([ np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues, predict_mean_se, predict_mean_ci[:,0], predict_mean_ci[:,1], predict_ci[:,0], predict_ci[:,1], res.resid, resid_se, infl.resid_studentized_internal, infl.cooks_distance[0] ]) #colnames, data = lzip(*table_raw) #unzip data = table_sm ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"] colnames = ss2 #self.table_data = data #data = np.column_stack(data) from statsmodels.iolib.table import SimpleTable, default_html_fmt from statsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html) return st, data, ss2
def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True, skip_header=False, title=None): '''create a summary table for the parameters Parameters ---------- res : results instance some required information is directly taken from the result instance yname : string or None optional name for the endogenous variable, default is "y" xname : list of strings or None optional names for the exogenous variables, default is "var_xx" alpha : float significance level for the confidence intervals use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) skip_headers : bool If false (default), then the header row is added. If true, then no header row is added. Returns ------- params_table : SimpleTable instance ''' #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal if isinstance(results, tuple): #for multivariate endog #TODO: check whether I don't want to refactor this #we need to give parameter alpha to conf_int results, params, std_err, tvalues, pvalues, conf_int = results else: params = results.params std_err = results.bse tvalues = results.tvalues #is this sometimes called zvalues pvalues = results.pvalues conf_int = results.conf_int(alpha) #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype if use_t: param_header = ['coef', 'std err', 't', 'P>|t|', '[' + str(alpha/2), str(1-alpha/2) + ']'] else: param_header = ['coef', 'std err', 'z', 'P>|z|', '[' + str(alpha/2), str(1-alpha/2) + ']'] if skip_header: param_header = None _, xname = _getnames(results, yname=yname, xname=xname) if len(xname) != len(params): raise ValueError('xnames and params do not have the same length') params_stubs = xname exog_idx = lrange(len(xname)) params_data = lzip([forg(params[i], prec=4) for i in exog_idx], [forg(std_err[i]) for i in exog_idx], [forg(tvalues[i]) for i in exog_idx], ["%#6.3f" % (pvalues[i]) for i in exog_idx], [forg(conf_int[i,0]) for i in exog_idx], [forg(conf_int[i,1]) for i in exog_idx]) parameter_table = SimpleTable(params_data, param_header, params_stubs, title = title, txt_fmt = fmt_params #gen_fmt #fmt_2, #gen_fmt, ) return parameter_table
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'Maximum Likelihood'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [('R-squared:', '%#8.3f' % self.rsquared), ('Adj. R-squared:', '%#8.3f' % self.rsquared_adj), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('Df Residuals:', self.nobs - self.num_params), ('Df Model:', self.num_params)] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append('[' + format_float_fixed(c[0], 7, 3) + ',' + format_float_fixed(c[1], 7, 3) + ']') stubs = self._names header = ['coef', 'std err', 't', 'P>|t|', '95.0% Conf. Int.'] vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str) formats = [(10, 4), (9, 3), (9, 3), (9, 3), None] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ['Covariance estimator: ' + self.cov_type] if self.convergence_flag: extra_text.append(""" WARNING: The optimizer did not indicate sucessful convergence. The message was {string_message}. See convergence_flag.""".format( string_message=self._optim_output[-1])) smry.add_extra_txt(extra_text) return smry
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Default : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load(as_pandas=False) >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ import time as time #TODO Make sure all self.model.__class__.__name__ are listed model_types = {'OLS' : 'Ordinary least squares', 'GLS' : 'Generalized least squares', 'GLSAR' : 'Generalized least squares with AR(p)', 'WLS' : 'Weighted least squares', 'RLM' : 'Robust linear model', 'GLM' : 'Generalized linear model' } model_methods = {'OLS' : 'Least Squares', 'GLS' : 'Least Squares', 'GLSAR' : 'Least Squares', 'WLS' : 'Least Squares', 'RLM' : '?', 'GLM' : '?'} if title==0: title = model_types[self.model.__class__.__name__] if yname is None: try: yname = self.model.endog_names except AttributeError: yname = 'y' if xname is None: try: xname = self.model.exog_names except AttributeError: xname = ['var_%d' % i for i in range(len(self.params))] time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ #dist_family = self.model.family.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ #TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), #What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None ## gen_stubs_left = ('Model type:', ## 'Date:', ## 'Dependent Variable:', ## 'df model' ## ) ## gen_data_left = [[modeltype], ## [date], ## yname, #What happens with multiple names? ## [df_model] ## ] gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid') gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = gen_fmt) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal tstats = {'OLS' : self.t(), 'GLS' : self.t(), 'GLSAR' : self.t(), 'WLS' : self.t(), 'RLM' : self.t(), 'GLM' : self.t()} prob_stats = {'OLS' : self.pvalues, 'GLS' : self.pvalues, 'GLSAR' : self.pvalues, 'WLS' : self.pvalues, 'RLM' : self.pvalues, 'GLM' : self.pvalues } #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = lrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len]) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title = None, txt_fmt = fmt_2, #gen_fmt, ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def ols_to_csv(): """ exports ols summary data to csv """ pass def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table pass printers = {'OLS': ols_printer, 'GLM': glm_printer} if returns=='print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary(self): """ Summarize the fitted Model Returns ------- smry : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary from statsmodels.iolib.table import SimpleTable model = self.model title = model.__class__.__name__ + " Model Results" dep_variable = "endog" orig_endog = self.model.data.orig_endog if isinstance(orig_endog, pd.DataFrame): dep_variable = orig_endog.columns[0] elif isinstance(orig_endog, pd.Series): dep_variable = orig_endog.name seasonal_periods = (None if self.model.seasonal is None else self.model.seasonal_periods) lookup = { "add": "Additive", "additive": "Additive", "mul": "Multiplicative", "multiplicative": "Multiplicative", None: "None", } transform = self.params["use_boxcox"] box_cox_transform = True if transform else False box_cox_coeff = (transform if isinstance(transform, str) else self.params["lamda"]) if isinstance(box_cox_coeff, float): box_cox_coeff = "{:>10.5f}".format(box_cox_coeff) top_left = [ ("Dep. Variable:", [dep_variable]), ("Model:", [model.__class__.__name__]), ("Optimized:", [str(np.any(self.optimized))]), ("Trend:", [lookup[self.model.trend]]), ("Seasonal:", [lookup[self.model.seasonal]]), ("Seasonal Periods:", [str(seasonal_periods)]), ("Box-Cox:", [str(box_cox_transform)]), ("Box-Cox Coeff.:", [str(box_cox_coeff)]), ] top_right = [ ("No. Observations:", [str(len(self.model.endog))]), ("SSE", ["{:5.3f}".format(self.sse)]), ("AIC", ["{:5.3f}".format(self.aic)]), ("BIC", ["{:5.3f}".format(self.bic)]), ("AICC", ["{:5.3f}".format(self.aicc)]), ("Date:", None), ("Time:", None), ] smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) formatted = self.params_formatted # type: pd.DataFrame def _fmt(x): abs_x = np.abs(x) scale = 1 if np.isnan(x): return f"{str(x):>20}" if abs_x != 0: scale = int(np.log10(abs_x)) if scale > 4 or scale < -3: return "{:>20.5g}".format(x) dec = min(7 - scale, 7) fmt = "{{:>20.{0}f}}".format(dec) return fmt.format(x) tab = [] for _, vals in formatted.iterrows(): tab.append([ _fmt(vals.iloc[1]), "{0:>20}".format(vals.iloc[0]), "{0:>20}".format(str(bool(vals.iloc[2]))), ]) params_table = SimpleTable( tab, headers=["coeff", "code", "optimized"], title="", stubs=list(formatted.index), ) smry.tables.append(params_table) return smry