def test_SimpleTable_3(self): """ Test SimpleTable.extend() as in extend down""" desired = ''' ============================== header s1 header d1 ------------------------------ stub R1 C1 10.30312 10.73999 stub R2 C1 90.30312 90.73999 header s2 header d2 ------------------------------ stub R1 C2 50.95038 50.65765 stub R2 C2 40.95038 40.65765 ------------------------------ ''' data1 = [[10.30312, 10.73999], [90.30312, 90.73999]] data2 = [[50.95038, 50.65765], [40.95038, 40.65765]] stubs1 = ['stub R1 C1', 'stub R2 C1'] stubs2 = ['stub R1 C2', 'stub R2 C2'] header1 = ['header s1', 'header d1'] header2 = ['header s2', 'header d2'] actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt) actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt) actual1.extend(actual2) actual = '\n%s\n' % actual1.as_text() self.assertEqual(desired, str(actual))
def summary_table(self, float_fmt="%6.3f"): '''create a summary table with all influence and outlier measures This does currently not distinguish between statistics that can be calculated from the original regression results and for which a leave-one-observation-out loop is needed Returns ------- res : SimpleTable instance SimpleTable instance with the results, can be printed Notes ----- This also attaches table_data to the instance. ''' #print self.dfbetas # table_raw = [ np.arange(self.nobs), # self.endog, # self.fittedvalues, # self.cooks_distance(), # self.resid_studentized_internal, # self.hat_matrix_diag, # self.dffits_internal, # self.resid_studentized_external, # self.dffits, # self.dfbetas # ] table_raw = [ ('obs', np.arange(self.nobs)), ('endog', self.endog), ('fitted\nvalue', self.results.fittedvalues), ("Cook's\nd", self.cooks_distance[0]), ("student.\nresidual", self.resid_studentized_internal), ('hat diag', self.hat_matrix_diag), ('dffits \ninternal', self.dffits_internal[0]), ("ext.stud.\nresidual", self.resid_studentized_external), ('dffits', self.dffits[0]), ('dfbeta\nslope', self.dfbetas[:, 1] ) #skip needs to partially unravel ] colnames, data = zip(*table_raw) #unzip data = np.column_stack(data) self.table_data = data from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt from gwstatsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + [float_fmt] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] return SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html)
def test_SimpleTable_1(self): """Basic test, test_SimpleTable_1""" desired = ''' ===================== header1 header2 --------------------- stub1 1.30312 2.73999 stub2 1.95038 2.65765 --------------------- ''' test1data = [[1.30312, 2.73999],[1.95038, 2.65765]] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') actual = SimpleTable(test1data, test1header, test1stubs, txt_fmt=default_txt_fmt) actual = '\n%s\n' % actual.as_text() self.assertEqual(desired, str(actual))
def summary_quantiles(self, idx, distppf, frac=[0.01, 0.025, 0.05, 0.1, 0.975], varnames=None, title=None): '''summary table for quantiles (critical values) Parameters ---------- idx : None or list of integers List of indices into the Monte Carlo results (columns) that should be used in the calculation distppf : callable probability density function of reference distribution TODO: use `crit` values instead or additional, see summary_cdf frac : array_like, float probabilities for which varnames : None, or list of strings optional list of variable names, same length as idx Returns ------- table : instance of SimpleTable use `print table` to see results ''' idx = np.atleast_1d(idx) #assure iterable, use list ? quant, mcq = self.quantiles(idx, frac=frac) #not sure whether this will work with single quantile #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T) crit = distppf(np.atleast_2d(quant).T) mml = [] for i, ix in enumerate(idx): #TODO: hardcoded 2 ? mml.extend([mcq[:, i], crit[:, i]]) #mmlar = np.column_stack(mml) mmlar = np.column_stack([quant] + mml) #print mmlar.shape if title: title = title + ' Quantiles (critical values)' else: title = 'Quantiles (critical values)' #TODO use stub instead if varnames is None: varnames = ['var%d' % i for i in range(mmlar.shape[1] // 2)] headers = ['\nprob'] + [ '%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist'] ] return SimpleTable(mmlar, txt_fmt={ 'data_fmts': ["%#6.3f"] + ["%#10.4f"] * (mmlar.shape[1] - 1) }, title=title, headers=headers)
def test_customlabel(self): """Limited test of custom custom labeling""" if has_numpy: tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1) tbl[1][1].data = np.nan tbl.label_cells(custom_labeller) # print([[c.datatype for c in row] for row in tbl]) desired = """ ***************************** * * header1 * header2 * ***************************** * stub1 * -- * 1 * * stub2 * 2.00 * 3 * ***************************** """ actual = "\n%s\n" % tbl.as_text(missing="--") # print(actual) # print(desired) self.assertEqual(actual, desired)
def test_SimpleTable_1(self): """Basic test, test_SimpleTable_1""" desired = ''' ===================== header1 header2 --------------------- stub1 1.30312 2.73999 stub2 1.95038 2.65765 --------------------- ''' test1data = [[1.30312, 2.73999], [1.95038, 2.65765]] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') actual = SimpleTable(test1data, test1header, test1stubs, txt_fmt=default_txt_fmt) actual = '\n%s\n' % actual.as_text() self.assertEqual(desired, str(actual))
def summary_cdf(self, idx, frac, crit, varnames=None, title=None): '''summary table for cumulative density function Parameters ---------- idx : None or list of integers List of indices into the Monte Carlo results (columns) that should be used in the calculation frac : array_like, float probabilities for which crit : array_like values for which cdf is calculated varnames : None, or list of strings optional list of variable names, same length as idx Returns ------- table : instance of SimpleTable use `print table` to see results ''' idx = np.atleast_1d(idx) #assure iterable, use list ? mml = [] #TODO:need broadcasting in cdf for i in range(len(idx)): #print i, mc1.cdf(crit[:,i], [idx[i]])[1].ravel() mml.append(self.cdf(crit[:, i], [idx[i]])[1].ravel()) #mml = self.cdf(crit, idx)[1] #mmlar = np.column_stack(mml) #print mml[0].shape, np.shape(frac) mmlar = np.column_stack([frac] + mml) #print mmlar.shape if title: title = title + ' Probabilites' else: title = 'Probabilities' #TODO use stub instead #headers = ['\nprob'] + ['var%d\n%s' % (i, t) for i in range(mmlar.shape[1]-1) for t in ['mc']] if varnames is None: varnames = ['var%d' % i for i in range(mmlar.shape[1] - 1)] headers = ['prob'] + varnames return SimpleTable(mmlar, txt_fmt={ 'data_fmts': ["%#6.3f"] + ["%#10.4f"] * (np.array(mml).shape[1] - 1) }, title=title, headers=headers)
def test_customlabel(self): """Limited test of custom custom labeling""" if has_numpy: tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1) tbl[1][1].data = np.nan tbl.label_cells(custom_labeller) #print([[c.datatype for c in row] for row in tbl]) desired = """ ***************************** * * header1 * header2 * ***************************** * stub1 * -- * 1 * * stub2 * 2.00 * 3 * ***************************** """ actual = '\n%s\n' % tbl.as_text(missing='--') #print(actual) #print(desired) self.assertEqual(actual, desired)
def summary_proc(self, g): """ For internal use """ if self.exog != None: myTitle = ('exog = ' + str(self.groups[g]) + '\n') else: myTitle = "Kaplan-Meier Curve" table = np.transpose(self.results[g]) table = np.c_[np.transpose(self.ts[g]),table] table = SimpleTable(table, headers=['Time','Survival','Std. Err'], title = myTitle) print(table)
def summary_find_nfact(self): '''provides a summary for the selection of the number of factors Returns ------- sumstr : string summary of the results for selecting the number of factors ''' if not hasattr(self, 'results_find_nfact'): self.fit_find_nfact() results = self.results_find_nfact sumstr = '' sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O' # best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0), # np.argmin(results[:,-1],0))] sumstr += '\n' + ' '*19 + '%5d %4d %6d %5d' % tuple(self.best_nfact) from gwstatsmodels.iolib.table import (SimpleTable, default_txt_fmt, default_latex_fmt, default_html_fmt) headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ') numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f' txt_fmt1 = dict(data_fmts = numformat) tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1) sumstr += '\n' + "PCA regression on simulated data," sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables" sumstr += '\n' + tabl.__str__() sumstr += '\n' + "Notes: k is number of components of PCA," sumstr += '\n' + " constant is added additionally" sumstr += '\n' + " k=0 means regression on constant only" sumstr += '\n' + " L1O: sum of squared prediction errors for leave-one-out" return sumstr
def summary_find_nfact(self): '''provides a summary for the selection of the number of factors Returns ------- sumstr : string summary of the results for selecting the number of factors ''' if not hasattr(self, 'results_find_nfact'): self.fit_find_nfact() results = self.results_find_nfact sumstr = '' sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O' # best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0), # np.argmin(results[:,-1],0))] sumstr += '\n' + ' ' * 19 + '%5d %4d %6d %5d' % tuple(self.best_nfact) from gwstatsmodels.iolib.table import (SimpleTable, default_txt_fmt, default_latex_fmt, default_html_fmt) headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ') numformat = ['%6d'] + ['%10.3f'] * 4 #'%10.4f' txt_fmt1 = dict(data_fmts=numformat) tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1) sumstr += '\n' + "PCA regression on simulated data," sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables" sumstr += '\n' + tabl.__str__() sumstr += '\n' + "Notes: k is number of components of PCA," sumstr += '\n' + " constant is added additionally" sumstr += '\n' + " k=0 means regression on constant only" sumstr += '\n' + " L1O: sum of squared prediction errors for leave-one-out" return sumstr
def print_summary(self, stats, orientation='auto'): #TODO: need to specify a table formating for the numbers, using defualt title = 'Summary Statistics' header = stats stubs = self.univariate['obs'][1] data = [[self.univariate[astat][2][col] for astat in stats] for col in range(len(self.univariate['obs'][2]))] if (orientation == 'varcols') or \ (orientation == 'auto' and len(stubs) < len(header)): #swap rows and columns data = map(lambda *row: list(row), *data) header, stubs = stubs, header part_fmt = dict(data_fmts = ["%#8.4g"]*(len(header)-1)) table = SimpleTable(data, header, stubs, title=title, txt_fmt = part_fmt) return table
title_align='r', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') cell0data = 0.0000 cell1data = 1 row0data = [cell0data, cell1data] row1data = [2, 3.333] table1data = [row0data, row1data] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') #test1header = ('header1\nheader1a', 'header2\nheader2a') tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1, ltx_fmt=ltx_fmt1, html_fmt=html_fmt1) def custom_labeller(cell): if cell.data is np.nan: return 'missing' class test_Cell(unittest.TestCase): def test_celldata(self): celldata = cell0data, cell1data, row1data[0], row1data[1] cells = [ Cell(datum, datatype=i % 2) for i, datum in enumerate(celldata) ]
def test_SimpleTable_4(self): """Basic test, test_SimpleTable_4 test uses custom txt_fmt""" txt_fmt1 = dict(data_fmts=['%3.2f', '%d'], empty_cell=' ', colwidths=1, colsep=' * ', row_pre='* ', row_post=' *', table_dec_above='*', table_dec_below='*', header_dec_below='*', header_fmt='%s', stub_fmt='%s', title_align='r', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') ltx_fmt1 = default_latex_fmt.copy() html_fmt1 = default_html_fmt.copy() cell0data = 0.0000 cell1data = 1 row0data = [cell0data, cell1data] row1data = [2, 3.333] table1data = [row0data, row1data] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1, ltx_fmt=ltx_fmt1, html_fmt=html_fmt1) def test_txt_fmt1(self): """Limited test of custom txt_fmt""" desired = """ ***************************** * * header1 * header2 * ***************************** * stub1 * 0.00 * 1 * * stub2 * 2.00 * 3 * ***************************** """ actual = '\n%s\n' % tbl.as_text() #print(actual) #print(desired) self.assertEqual(actual, desired) def test_ltx_fmt1(self): """Limited test of custom ltx_fmt""" desired = r""" \begin{tabular}{lcc} \toprule & \textbf{header1} & \textbf{header2} \\ \midrule \textbf{stub1} & 0.0 & 1 \\ \textbf{stub2} & 2 & 3.333 \\ \bottomrule \end{tabular} """ actual = '\n%s\n' % tbl.as_latex_tabular() #print(actual) #print(desired) self.assertEqual(actual, desired) def test_html_fmt1(self): """Limited test of custom html_fmt""" desired = """ <table class="simpletable"> <tr> <td></td> <th>header1</th> <th>header2</th> </tr> <tr> <th>stub1</th> <td>0.0</td> <td>1</td> </tr> <tr> <th>stub2</th> <td>2</td> <td>3.333</td> </tr> </table> """ actual = '\n%s\n' % tbl.as_html() self.assertEqual(actual, desired)
def summary_table(res, alpha=0.05): '''generate summary table of outlier and influence similar to SAS Parameters ---------- alpha : float significance level for confidence interval Returns ------- st : SimpleTable instance table with results that can be printed data : ndarray calculated measures and statistics for the table ss2 : list of strings column_names for table (Note: rows of table are observations) ''' from scipy import stats from gwstatsmodels.sandbox.regression.predstd import wls_prediction_std infl = Influence(res) #standard error for predicted mean #Note: using hat_matrix only works for fitted values predict_mean_se = np.sqrt(infl.hat_matrix_diag * res.mse_resid) tppf = stats.t.isf(alpha / 2., res.df_resid) predict_mean_ci = np.column_stack([ res.fittedvalues - tppf * predict_mean_se, res.fittedvalues + tppf * predict_mean_se ]) #standard error for predicted observation predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res) predict_ci = np.column_stack((predict_ci_low, predict_ci_upp)) #standard deviation of residual resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag)) table_sm = np.column_stack([ np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues, predict_mean_se, predict_mean_ci[:, 0], predict_mean_ci[:, 1], predict_ci[:, 0], predict_ci[:, 1], res.resid, resid_se, infl.resid_studentized_internal, infl.cooks_distance[0] ]) #colnames, data = zip(*table_raw) #unzip data = table_sm ss2 = [ 'Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD" ] colnames = ss2 #self.table_data = data #data = np.column_stack(data) from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt from gwstatsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html) return st, data, ss2
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None, title=None): '''create summary table of regression parameters with several equations This allows interleaving of parameters with bse and/or tvalues Parameter --------- result : result instance the result instance with params and attributes in extras extras : list of strings additional attributes to add below a parameter row, e.g. bse or tvalues endog_names : None or list of strings names for rows of the parameter array (multivariate endog) exog_names : None or list of strings names for columns of the parameter array (exog) alpha : float level for confidence intervals, default 0.95 title : None or string Returns ------- tables : list of SimpleTable this contains a list of all seperate Subtables table_all : SimpleTable the merged table with results concatenated for each row of the parameter array ''' if endog_names is None: #TODO: note the [1:] is specific to current MNLogit endog_names = ['endog_%d' % i for i in np.unique(result.model.endog)[1:]] if exog_names is None: exog_names = ['var%d' %i for i in range(len(result.params))] #TODO: check formatting options with different values #res_params = [['%10.4f'%item for item in row] for row in result.params] res_params = [[forg(item, prec=4) for item in row] for row in result.params] if extras: #not None or non-empty #maybe this should be a simple triple loop instead of list comprehension? #below_list = [[['%10s' % ('('+('%10.3f'%v).strip()+')') extras_list = [[['%10s' % ('(' + forg(v, prec=3).strip() + ')') for v in col] for col in getattr(result, what)] for what in extras ] data = zip(res_params, *extras_list) data = [i for j in data for i in j] #flatten stubs = zip(endog_names, *[['']*len(endog_names)]*len(extras)) stubs = [i for j in stubs for i in j] #flatten #return SimpleTable(data, headers=exog_names, stubs=stubs) else: data = res_params stubs = endog_names # return SimpleTable(data, headers=exog_names, stubs=stubs, # data_fmts=['%10.4f']) import copy txt_fmt = copy.deepcopy(fmt_params) txt_fmt.update(dict(data_fmts = ["%s"]*result.params.shape[1])) return SimpleTable(data, headers=exog_names, stubs=stubs, title=title, # data_fmts = ["%s"]), txt_fmt = txt_fmt)
def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True, skip_header=False): '''create a summary table for the parameters Parameters ---------- res : results instance some required information is directly taken from the result instance yname : string or None optional name for the endogenous variable, default is "y" xname : list of strings or None optional names for the exogenous variables, default is "var_xx" alpha : float significance level for the confidence intervals use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) skip_headers : bool If false (default), then the header row is added. If true, then no header row is added. Returns ------- params_table : SimpleTable instance ''' #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal if isinstance(results, tuple): #for multivariate endog #TODO: check whether I don't want to refactor this #we need to give parameter alpha to conf_int results, params, std_err, tvalues, pvalues, conf_int = results else: params = results.params std_err = results.bse tvalues = results.tvalues #is this sometimes called zvalues pvalues = results.pvalues conf_int = results.conf_int(alpha) #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' if use_t: param_header = ['coef', 'std err', 't', 'P>|t|', '[' + alp + ' Conf. Int.]'] else: param_header = ['coef', 'std err', 'z', 'P>|z|', '[' + alp + ' Conf. Int.]'] if skip_header: param_header = None _, xname = _getnames(results, yname=yname, xname=xname) params_stubs = xname exog_idx = xrange(len(xname)) #center confidence intervals if they are unequal lengths # confint = ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \ # exog_idx] confint = ["%s %s" % tuple(map(forg, conf_int[i])) for i in \ exog_idx] len_ci = map(len, confint) max_ci = max(len_ci) min_ci = min(len_ci) if min_ci < max_ci: confint = [ci.center(max_ci) for ci in confint] #explicit f/g formatting, now uses forg, f or g depending on values # params_data = zip(["%#6.4g" % (params[i]) for i in exog_idx], # ["%#6.4f" % (std_err[i]) for i in exog_idx], # ["%#6.3f" % (tvalues[i]) for i in exog_idx], # ["%#6.3f" % (pvalues[i]) for i in exog_idx], # confint ## ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \ ## exog_idx] # ) params_data = zip([forg(params[i], prec=4) for i in exog_idx], [forg(std_err[i]) for i in exog_idx], [forg(tvalues[i]) for i in exog_idx], ["%#6.3f" % (pvalues[i]) for i in exog_idx], confint # ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \ # exog_idx] ) parameter_table = SimpleTable(params_data, param_header, params_stubs, title = None, txt_fmt = fmt_params #gen_fmt #fmt_2, #gen_fmt, ) return parameter_table
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright #time and names are always included import time time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) #create dictionary with default #use lambdas because some values raise exception if they are not available #alternate spellings are commented out to force unique labels default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), #('Model type:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), #('No. of Observations:', lambda: ["%#6d" % results.nobs]), ('No. Observations:', lambda: ["%#6d" % results.nobs]), #('Df model:', lambda: [results.df_model]), ('Df Model:', lambda: ["%#6d" % results.df_model]), #TODO: check when we have non-integer df ('Df Residuals:', lambda: ["%#6d" % results.df_resid]), #('Df resid:', lambda: [results.df_resid]), #('df resid:', lambda: [results.df_resid]), #check capitalization ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception #('Method:', lambda: [???]), #no default for this ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: #default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None) ('Df model:', None), ('Df resid:', None)] try: llf = results.llf gen_left.append(('Log-Likelihood', None)) except: #AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore #replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ #check missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values #pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): #fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): #fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) #padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = fmt_2cols #gen_fmt ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table #, gen_table_left, gen_table_right
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import gwstatsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ import time as time #TODO Make sure all self.model.__class__.__name__ are listed model_types = {'OLS' : 'Ordinary least squares', 'GLS' : 'Generalized least squares', 'GLSAR' : 'Generalized least squares with AR(p)', 'WLS' : 'Weigthed least squares', 'RLM' : 'Robust linear model', 'GLM' : 'Generalized linear model' } model_methods = {'OLS' : 'Least Squares', 'GLS' : 'Least Squares', 'GLSAR' : 'Least Squares', 'WLS' : 'Least Squares', 'RLM' : '?', 'GLM' : '?' } if title==0: title = model_types[self.model.__class__.__name__] if yname is None: try: yname = self.model.endog_names except AttributeError: yname = 'y' if xname is None: try: xname = self.model.exog_names except AttributeError: xname = ['var_%d' % i for i in range(len(self.params))] time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ #dist_family = self.model.family.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ #TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), #What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None ## gen_stubs_left = ('Model type:', ## 'Date:', ## 'Dependent Variable:', ## 'df model' ## ) ## gen_data_left = [[modeltype], ## [date], ## yname, #What happens with multiple names? ## [df_model] ## ] gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid' ) gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = gen_fmt ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal tstats = {'OLS' : self.t(), 'GLS' : self.t(), 'GLSAR' : self.t(), 'WLS' : self.t(), 'RLM' : self.t(), 'GLM' : self.t() } prob_stats = {'OLS' : self.pvalues, 'GLS' : self.pvalues, 'GLSAR' : self.pvalues, 'WLS' : self.pvalues, 'RLM' : self.pvalues, 'GLM' : self.pvalues } #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = xrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = zip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \ exog_len] ) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title = None, txt_fmt = fmt_2, #gen_fmt, ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def ols_to_csv(): """ exports ols summary data to csv """ pass def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table pass printers = {'OLS': ols_printer, 'GLM' : glm_printer } if returns=='print': try: return printers[modeltype]() except KeyError: return printers['OLS']()