def _stats_table(self): # TODO: do we want individual statistics or should users just # use results if wanted? # Handle overall fit statistics model = self.model part2Lstubs = ('No. of Equations:', 'Nobs:', 'Log likelihood:', 'AIC:') part2Rstubs = ('BIC:', 'HQIC:', 'FPE:', 'Det(Omega_mle):') part2Ldata = [[model.neqs], [model.nobs], [model.llf], [model.aic]] part2Rdata = [[model.bic], [model.hqic], [model.fpe], [model.detomega]] part2Lheader = None part2L = SimpleTable(part2Ldata, part2Lheader, part2Lstubs, txt_fmt = self.part2_fmt) part2R = SimpleTable(part2Rdata, part2Lheader, part2Rstubs, txt_fmt = self.part2_fmt) part2L.extend_right(part2R) return str(part2L)
def _header_table(self): import time model = self.model t = time.localtime() # TODO: change when we allow coef restrictions # ncoefs = len(model.beta) # Header information part1title = "Summary of Regression Results" part1data = [ [model._model_type], ["OLS"], #TODO: change when fit methods change [time.strftime("%a, %d, %b, %Y", t)], [time.strftime("%H:%M:%S", t)] ] part1header = None part1stubs = ('Model:', 'Method:', 'Date:', 'Time:') part1 = SimpleTable(part1data, part1header, part1stubs, title=part1title, txt_fmt=self.part1_fmt) return str(part1)
def print_ic_table(ics, selected_orders): """ For VAR order selection """ # Can factor this out into a utility method if so desired cols = sorted(ics) data = mat([["%#10.4g" % v for v in ics[c]] for c in cols], dtype=object).T # start minimums for i, col in enumerate(cols): idx = int(selected_orders[col]), i data[idx] = data[idx] + '*' # data[idx] = data[idx][:-1] + '*' # super hack, ugh fmt = dict(_default_table_fmt, data_fmts=("%s", ) * len(cols)) buf = StringIO() table = SimpleTable(data, cols, lrange(len(data)), title='VAR Order Selection', txt_fmt=fmt) buf.write(str(table) + '\n') buf.write('* Minimum' + '\n') print(buf.getvalue())
def _coef_table(self): model = self.model k = model.neqs Xnames = self.model.exog_names data = lzip(model.params.T.ravel(), model.stderr.T.ravel(), model.tvalues.T.ravel(), model.pvalues.T.ravel()) header = ('coefficient', 'std. error', 't-stat', 'prob') buf = StringIO() dim = k * model.k_ar + model.k_trend for i in range(k): section = "Results for equation %s" % model.names[i] buf.write(section + '\n') #print >> buf, section table = SimpleTable(data[dim * i:dim * (i + 1)], header, Xnames, title=None, txt_fmt=self.default_fmt) buf.write(str(table) + '\n') if i < k - 1: buf.write('\n') return buf.getvalue()
def _twocol_summary(self, lhs_data, rhs_data, title=None): if not isinstance(lhs_data, dict) or not isinstance(rhs_data, dict): raise ValueError(f"Args must be type `dict`") lhs_rownames, lhs_data = zip(*lhs_data.items()) lhs_rownames = [rowname + ':' for rowname in lhs_rownames] rhs_rownames, rhs_data = zip(*rhs_data.items()) rhs_rownames = [rowname + ':' for rowname in rhs_rownames] lhs_table = SimpleTable(lhs_data, None, lhs_rownames, title) rhs_table = SimpleTable(rhs_data, None, rhs_rownames, title) lhs_table.extend_right(rhs_table) return lhs_table
def summary_coeff(self): from statsmodels.iolib import SimpleTable params_arr = self.params.reshape(self.nlevel1, self.nlevel2) stubs = self.d1_labels headers = self.d2_labels title = 'Estimated Coefficients by factors' table_fmt = dict( data_fmts = ["%#10.4g"]*self.nlevel2) return SimpleTable(params_arr, headers, stubs, title=title, txt_fmt=table_fmt)
def print_results(res): groupind = res.groups #res.fitjoint() #not really necessary, because called by ftest_summary ft = res.ftest_summary() #print ft[0] #skip because table is nicer print( '\nTable of F-tests for overall or pairwise equality of coefficients') ## print 'hypothesis F-statistic p-value df_denom df_num reject' ## for row in ft[1]: ## print row, ## if row[1][1]<0.05: ## print '*' ## else: ## print '' from statsmodels.iolib import SimpleTable print( SimpleTable( [(['%r' % (row[0], )] + list(row[1]) + ['*'] * (row[1][1] > 0.5).item()) for row in ft[1]], headers=['pair', 'F-statistic', 'p-value', 'df_denom', 'df_num'])) print('Notes: p-values are not corrected for many tests') print(' (no Bonferroni correction)') print(' * : reject at 5% uncorrected confidence level') print('Null hypothesis: all or pairwise coefficient are the same') print('Alternative hypothesis: all coefficients are different') print('\nComparison with stats.f_oneway') print(stats.f_oneway(*[y[groupind == gr] for gr in res.unique])) print('\nLikelihood Ratio Test') print('likelihood ratio p-value df') print(res.lr_test()) print('Null model: pooled all coefficients are the same across groups,') print('Alternative model: all coefficients are allowed to be different') print('not verified but looks close to f-test result') print( '\nOls parameters by group from individual, separate ols regressions') for group in sorted(res.olsbygroup): r = res.olsbygroup[group] print(group, r.params) print('\nCheck for heteroscedasticity, ') print('variance and standard deviation for individual regressions') print(' ' * 12, ' '.join('group %-10s' % (gr) for gr in res.unique)) print('variance ', res.sigmabygroup) print('standard dev', np.sqrt(res.sigmabygroup))
def hypothesis_test_table(results, title, null_hyp): fmt = dict(_default_table_fmt, data_fmts=["%#15.6F", "%#15.6F", "%#15.3F", "%s"]) buf = StringIO() table = SimpleTable([[ results['statistic'], results['crit_value'], results['pvalue'], str(results['df']) ]], ['Test statistic', 'Critical Value', 'p-value', 'df'], [''], title=None, txt_fmt=fmt) buf.write(title + '\n') buf.write(str(table) + '\n') buf.write(null_hyp + '\n') buf.write("Conclusion: %s H_0" % results['conclusion']) buf.write(" at %.2f%% significance level" % (results['signif'] * 100)) return buf.getvalue()
def get_ic_table(ics, selected_orders): ''' 该方法将滞后阶数结果转换为表格化的分析结果 :param ics: 滞后阶数结果 :param selected_orders: 最大滞后阶数 :return: 返回表格化的滞后阶数分析结果 ''' _default_table_fmt = dict(empty_cell='', colsep=' ', row_pre='', row_post='', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns='r', stubs_align='l', fmt='txt') cols = sorted(ics) data = np.array([["%#10.4g" % v for v in ics[c]] for c in cols], dtype=object).T for i, col in enumerate(cols): idx = int(selected_orders[col]), i data[idx] = data[idx] + '*' fmt = dict(_default_table_fmt, data_fmts=("%s", ) * len(cols)) buf = StringIO() table = SimpleTable(data, cols, lrange(len(data)), title='VAR Order Selection', txt_fmt=fmt) buf.write(str(table) + '\n') buf.write('* Minimum' + '\n') return buf.getvalue()
def print_summary(self, res): '''printable string of summary ''' groupind = res.groups #res.fitjoint() #not really necessary, because called by ftest_summary if hasattr(res, 'self.summarytable'): summtable = self.summarytable else: _, summtable = res.ftest_summary() txt = '' #print ft[0] #skip because table is nicer templ = \ '''Table of F-tests for overall or pairwise equality of coefficients' %(tab)s Notes: p-values are not corrected for many tests (no Bonferroni correction) * : reject at 5%% uncorrected confidence level Null hypothesis: all or pairwise coefficient are the same' Alternative hypothesis: all coefficients are different' Comparison with stats.f_oneway %(statsfow)s Likelihood Ratio Test %(lrtest)s Null model: pooled all coefficients are the same across groups,' Alternative model: all coefficients are allowed to be different' not verified but looks close to f-test result' OLS parameters by group from individual, separate ols regressions' %(olsbg)s for group in sorted(res.olsbygroup): r = res.olsbygroup[group] print group, r.params Check for heteroscedasticity, ' variance and standard deviation for individual regressions' %(grh)s variance ', res.sigmabygroup standard dev', np.sqrt(res.sigmabygroup) ''' from statsmodels.iolib import SimpleTable resvals = {} resvals['tab'] = str(SimpleTable([(['%r' % (row[0],)] + list(row[1]) + ['*']*(row[1][1]>0.5).item() ) for row in summtable], headers=['pair', 'F-statistic','p-value','df_denom', 'df_num'])) resvals['statsfow'] = str(stats.f_oneway(*[res.endog[groupind==gr] for gr in res.unique])) #resvals['lrtest'] = str(res.lr_test()) resvals['lrtest'] = str(SimpleTable([res.lr_test()], headers=['likelihood ratio', 'p-value', 'df'] )) resvals['olsbg'] = str(SimpleTable([[group] + res.olsbygroup[group].params.tolist() for group in sorted(res.olsbygroup)])) resvals['grh'] = str(SimpleTable(np.vstack([res.sigmabygroup, np.sqrt(res.sigmabygroup)]), headers=res.unique.tolist())) return templ % resvals
mc1 = c1.item() mc2 = (c2*nc**np.array([2,0])).sum() mc3 = (c3*nc**np.array([3,1])).sum() mc4 = c4=np.array([0.0425458, 1.17491, 6.25]) mvsk_nc = mc2mvsk((mc1,mc2,mc3,mc4)) if __name__ == '__main__': check_cont_basic() #print [(k, v[0]) for k,v in res.items() if np.abs(v[0]-1)>1e-3] #print [(k, v[2][0], 1+2*v[2][0]) for k,v in res.items() if np.abs(v[-1]-(1+2*v[2][0]))>1e-3] mean_ = [(k, v[1][0], v[2][0]) for k,v in res.items() if np.abs(v[1][0] - v[2][0])>1e-6 and np.isfinite(v[1][0])] var_ = [(k, v[1][1], v[2][1]) for k,v in res.items() if np.abs(v[1][1] - v[2][1])>1e-2 and np.isfinite(v[1][1])] skew = [(k, v[1][2], v[2][2]) for k,v in res.items() if np.abs(v[1][2] - v[2][2])>1e-2 and np.isfinite(v[1][1])] kurt = [(k, v[1][3], v[2][3]) for k,v in res.items() if np.abs(v[1][3] - v[2][3])>1e-2 and np.isfinite(v[1][1])] from statsmodels.iolib import SimpleTable if len(mean_) > 0: print('\nMean difference at least 1e-6') print(SimpleTable(mean_, headers=['distname', 'diststats', 'expect'])) print('\nVariance difference at least 1e-2') print(SimpleTable(var_, headers=['distname', 'diststats', 'expect'])) print('\nSkew difference at least 1e-2') print(SimpleTable(skew, headers=['distname', 'diststats', 'expect'])) print('\nKurtosis difference at least 1e-2') print(SimpleTable(kurt, headers=['distname', 'diststats', 'expect']))
def summary_old(self, yname=None, xname=None, title='Generalized linear model', returns='text'): """ Print a table of results or returns SimpleTable() instance which summarizes the Generalized linear model results. Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- stand_errors are not implimented. conf_int calculated from normal dist. """ import time as Time from statsmodels.iolib import SimpleTable yname = 'Y' if xname is None: xname = ['x%d' % i for i in range(self.model.exog.shape[1])] #List of results used in summary #yname = yname #xname = xname time = Time.localtime() dist_family = self.model.family.__class__.__name__ df_model = self.df_model df_resid = self.df_resid llf = self.llf nobs = self.nobs params = self.params scale = self.scale #TODO #stand_errors = self.stand_errors stand_errors = self.bse #[' ' for x in range(len(self.params))] #Added note about conf_int conf_int = self.conf_int() #f_test() = self.f_test() t = self.tvalues #t_test = self.t_test() table_1l_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s", "%s"], empty_cell = '', colwidths = 15, colsep=' ', row_pre = ' ', row_post = ' ', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) # Note table_1l_fmt over rides the below formating. in extend_right? JP table_1r_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s", "%1s"], empty_cell = '', colwidths = 12, colsep=' ', row_pre = '', row_post = '', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) table_2_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s"], #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], #data_fmts = ["%#10.4g","%#6.4f", "%#6.4f"], #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell = '', colwidths = 13, colsep=' ', row_pre = ' ', row_post = ' ', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) ######## summary table 1 ####### table_1l_title = title table_1l_header = None table_1l_stubs = ('Model Family:', 'Method:', 'Dependent Variable:', 'Date:', 'Time:', ) table_1l_data = [ [dist_family], ['IRLS'], [yname], [Time.strftime("%a, %d %b %Y",time)], [Time.strftime("%H:%M:%S",time)], ] table_1l = SimpleTable(table_1l_data, table_1l_header, table_1l_stubs, title=table_1l_title, txt_fmt = table_1l_fmt) table_1r_title = None table_1r_header = None table_1r_stubs = ('# of obs:', 'Df residuals:', 'Df model:', 'Scale:', 'Log likelihood:' ) table_1r_data = [ [nobs], [df_resid], [df_model], ["%#6.4f" % (scale,)], ["%#6.4f" % (llf,)] ] table_1r = SimpleTable(table_1r_data, table_1r_header, table_1r_stubs, title=table_1r_title, txt_fmt = table_1r_fmt) ######## summary table 2 ####### #TODO add % range to confidance interval column header table_2header = ('coefficient', 'stand errors', 't-statistic', 'Conf. Interval') table_2stubs = xname table_2data = zip( ["%#6.4f" % (params[i]) for i in range(len(xname))], ["%#6.4f" % stand_errors[i] for i in range(len(xname))], ["%#6.4f" % (t[i]) for i in range(len(xname))], [""" [%#6.3f, %#6.3f]""" % tuple(conf_int[i]) for i in range(len(xname))] ) #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} table_2 = SimpleTable(table_2data, table_2header, table_2stubs, title=None, txt_fmt = table_2_fmt) ######## Return Summary Tables ######## # join table table_s then print if returns == 'text': table_1l.extend_right(table_1r) return str(table_1l) + '\n' + str(table_2) elif returns == 'print': table_1l.extend_right(table_1r) print(str(table_1l) + '\n' + str(table_2)) elif returns == 'tables': return [table_1l, table_1r, table_2] #return [table_1, table_2 ,table_3L, notes] elif returns == 'csv': return table_1.as_csv() + '\n' + table_2.as_csv() + '\n' + \ table_3L.as_csv() elif returns == 'latex': print('not avalible yet') elif returns == html: print('not avalible yet')
def ols_high_d_category_multi_results(data_df, models, table_header): """ This function is used to get multi results of multi models on one dataframe. During analyzing data with large data size and complicated, we usually have several model assumptions. By using this function, we can easily get the results comparison of the different models. :param data_df: Dataframe with relevant data :param models: List of models :param table_header: Title of summary table :return: summary table of results of the different models """ results = [] for model1 in models: results.append( ols_high_d_category(data_df, model1['consist_input'], model1['out_input'], model1['category_input'], model1['cluster_input'], formula=None, robust=False, c_method='cgm', epsilon=1e-5, max_iter=1e6)) consist_name_list = [result.params.index.to_list() for result in results] consist_name_total = [] consist_name_total.extend(consist_name_list[0]) for i in consist_name_list[1:]: for j in i: if j not in consist_name_total: consist_name_total.append(j) index_name = [] for name in consist_name_total: index_name.append(name) index_name.append('pvalue') index_name.append('std err') exog_len = lrange(len(results)) lzip = [] y_zip = [] b_zip = np.zeros(5) table_content = [] for name in consist_name_total: coeff_list = [] pvalue_list = [] std_list = [] for i in range(len(results)): if name in consist_name_list[i]: coeff = "%#7.4g" % (results[i].params[name]) pvalue = "%#8.2g" % (results[i].pvalues[name]) std = "%#8.2f" % ( results[i].bse[consist_name_list[i].index(name)]) coeff_list.append(coeff) pvalue_list.append(pvalue) std_list.append(std) else: coeff = 'Nan' pvalue = 'Nan' std = 'Nan' coeff_list.append(coeff) pvalue_list.append(pvalue) std_list.append(std) table_content.append(tuple(coeff_list)) table_content.append(tuple(pvalue_list)) table_content.append(tuple(std_list)) wtffff = dict( fmt='txt', # basic table formatting table_dec_above='=', table_dec_below='-', title_align='l', # basic row formatting row_pre='', row_post='', header_dec_below='-', row_dec_below=None, colwidths=None, colsep=' ', data_aligns="l", # data formats # data_fmt="%s", data_fmts=["%s"], # labeled alignments # stubs_align='l', stub_align='l', header_align='r', # labeled formats header_fmt='%s', stub_fmt='%s', header='%s', stub='%s', empty_cell='', empty='', missing='--', ) a = SimpleTable(table_content, table_header, index_name, title='multi', txt_fmt=wtffff) print(a)
def summary_old(self, yname=None, xname=None, title='Generalized linear model', returns='text'): """ Print a table of results or returns SimpleTable() instance which summarizes the Generalized linear model results. Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- stand_errors are not implimented. conf_int calculated from normal dist. """ import time as Time from statsmodels.iolib import SimpleTable yname = 'Y' if xname is None: xname = ['x%d' % i for i in range(self.model.exog.shape[1])] #List of results used in summary #yname = yname #xname = xname time = Time.localtime() dist_family = self.model.family.__class__.__name__ df_model = self.df_model df_resid = self.df_resid llf = self.llf nobs = self.nobs params = self.params scale = self.scale #TODO #stand_errors = self.stand_errors stand_errors = self.bse #[' ' for x in range(len(self.params))] #Added note about conf_int conf_int = self.conf_int() #f_test() = self.f_test() t = self.tvalues #t_test = self.t_test() table_1l_fmt = dict(data_fmts=["%s", "%s", "%s", "%s", "%s"], empty_cell='', colwidths=15, colsep=' ', row_pre=' ', row_post=' ', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') # Note table_1l_fmt over rides the below formating. in extend_right? JP table_1r_fmt = dict(data_fmts=["%s", "%s", "%s", "%s", "%1s"], empty_cell='', colwidths=12, colsep=' ', row_pre='', row_post='', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') table_2_fmt = dict( data_fmts=["%s", "%s", "%s", "%s"], #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], #data_fmts = ["%#10.4g","%#6.4f", "%#6.4f"], #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell='', colwidths=13, colsep=' ', row_pre=' ', row_post=' ', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns='r', stubs_align='l', fmt='txt') ######## summary table 1 ####### table_1l_title = title table_1l_header = None table_1l_stubs = ( 'Model Family:', 'Method:', 'Dependent Variable:', 'Date:', 'Time:', ) table_1l_data = [ [dist_family], ['IRLS'], [yname], [Time.strftime("%a, %d %b %Y", time)], [Time.strftime("%H:%M:%S", time)], ] table_1l = SimpleTable(table_1l_data, table_1l_header, table_1l_stubs, title=table_1l_title, txt_fmt=table_1l_fmt) table_1r_title = None table_1r_header = None table_1r_stubs = ('# of obs:', 'Df residuals:', 'Df model:', 'Scale:', 'Log likelihood:') table_1r_data = [[nobs], [df_resid], [df_model], ["%#6.4f" % (scale, )], ["%#6.4f" % (llf, )]] table_1r = SimpleTable(table_1r_data, table_1r_header, table_1r_stubs, title=table_1r_title, txt_fmt=table_1r_fmt) ######## summary table 2 ####### #TODO add % range to confidance interval column header table_2header = ('coefficient', 'stand errors', 't-statistic', 'Conf. Interval') table_2stubs = xname table_2data = zip( ["%#6.4f" % (params[i]) for i in range(len(xname))], ["%#6.4f" % stand_errors[i] for i in range(len(xname))], ["%#6.4f" % (t[i]) for i in range(len(xname))], [ """ [%#6.3f, %#6.3f]""" % tuple(conf_int[i]) for i in range(len(xname)) ]) #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} table_2 = SimpleTable(table_2data, table_2header, table_2stubs, title=None, txt_fmt=table_2_fmt) ######## Return Summary Tables ######## # join table table_s then print if returns == 'text': table_1l.extend_right(table_1r) return str(table_1l) + '\n' + str(table_2) elif returns == 'print': table_1l.extend_right(table_1r) print(str(table_1l) + '\n' + str(table_2)) elif returns == 'tables': return [table_1l, table_1r, table_2] #return [table_1, table_2 ,table_3L, notes] elif returns == 'csv': return table_1.as_csv() + '\n' + table_2.as_csv() + '\n' + \ table_3L.as_csv() elif returns == 'latex': print('not avalible yet') elif returns == html: print('not avalible yet')
def _single_summary(self, data, title=None): if not isinstance(data, dict): raise ValueError(f"`data` arg ({type(data)}) must be type `dict`") rownames, data = zip(*data.items()) rownames = [rowname + ':' for rowname in rownames] return SimpleTable(data, None, rownames, title)