def test_SimpleTable_2(self): """ Test SimpleTable.extend_right()""" desired = ''' ============================================================= header s1 header d1 header s2 header d2 ------------------------------------------------------------- stub R1 C1 10.30312 10.73999 stub R1 C2 50.95038 50.65765 stub R2 C1 90.30312 90.73999 stub R2 C2 40.95038 40.65765 ------------------------------------------------------------- ''' data1 = [[10.30312, 10.73999], [90.30312, 90.73999]] data2 = [[50.95038, 50.65765], [40.95038, 40.65765]] stubs1 = ['stub R1 C1', 'stub R2 C1'] stubs2 = ['stub R1 C2', 'stub R2 C2'] header1 = ['header s1', 'header d1'] header2 = ['header s2', 'header d2'] actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt) actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt) actual1.extend_right(actual2) actual = '\n%s\n' % actual1.as_text() self.assertEqual(desired, str(actual))
def summary(self, yname=None, xname=None, returns='text'): """returns a string that summarizes the regression results Parameters ----------- yname : string, optional Default is `Y` xname : list of strings, optional Default is `X.#` for # in p the number of regressors Returns ------- String summarizing the fit of a linear model. Examples -------- >>> import scikits.statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- All residual statistics are calculated on whitened residuals. """ import time from scikits.statsmodels.iolib.table import SimpleTable from scikits.statsmodels.stats.stattools import (jarque_bera, omni_normtest, durbin_watson) if yname is None: yname = self.model.endog_names if xname is None: xname = self.model.exog_names modeltype = self.model.__class__.__name__ llf, aic, bic = self.llf, self.aic, self.bic JB, JBpv, skew, kurtosis = jarque_bera(self.wresid) omni, omnipv = omni_normtest(self.wresid) t = time.localtime() part1_fmt = dict( data_fmts = ["%s"], empty_cell = '', colwidths = 15, colsep=' ', row_pre = '| ', row_post = '|', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) part2_fmt = dict( #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], data_fmts = ["%#10.4g","%#10.4g","%#6.4f","%#6.4f"], #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell = '', colwidths = 14, colsep=' ', row_pre = '| ', row_post = ' |', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) part3_fmt = dict( #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], data_fmts = ["%#10.4g","%#10.4g","%#10.4g","%#6.4g"], empty_cell = '', colwidths = 15, colsep=' ', row_pre = '| ', row_post = ' |', table_dec_above=None, table_dec_below='-', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) # Print the first part of the summary table part1data = [[yname], [modeltype], ['Least Squares'], [time.strftime("%a, %d %b %Y",t)], [time.strftime("%H:%M:%S",t)], [self.nobs], [self.df_resid], [self.df_model]] part1header = None part1title = 'Summary of Regression Results' part1stubs = ('Dependent Variable:', 'Model:', 'Method:', 'Date:', 'Time:', '# obs:', 'Df residuals:', 'Df model:') part1 = SimpleTable(part1data, part1header, part1stubs, title=part1title, txt_fmt = part1_fmt) ######## summary Part 2 ####### part2data = zip([self.params[i] for i in range(len(xname))], [self.bse[i] for i in range(len(xname))], [self.tvalues[i] for i in range(len(xname))], [self.pvalues[i] for i in range(len(xname))]) part2header = ('coefficient', 'std. error', 't-statistic', 'prob.') part2stubs = xname #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} part2 = SimpleTable(part2data, part2header, part2stubs, title=None, txt_fmt = part2_fmt) self.summary2 = part2 ######## summary Part 3 ####### part3Lheader = ['Models stats'] part3Rheader = ['Residual stats'] part3Lstubs = ('R-squared:', 'Adjusted R-squared:', 'F-statistic:', 'Prob (F-statistic):', 'Log likelihood:', 'AIC criterion:', 'BIC criterion:',) part3Rstubs = ('Durbin-Watson:', 'Omnibus:', 'Prob(Omnibus):', 'JB:', 'Prob(JB):', 'Skew:', 'Kurtosis:') part3Ldata = [[self.rsquared], [self.rsquared_adj], [self.fvalue], [self.f_pvalue], [llf], [aic], [bic]] part3Rdata = [[durbin_watson(self.wresid)], [omni], [omnipv], [JB], [JBpv], [skew], [kurtosis]] part3L = SimpleTable(part3Ldata, part3Lheader, part3Lstubs, txt_fmt = part3_fmt) part3R = SimpleTable(part3Rdata, part3Rheader, part3Rstubs, txt_fmt = part3_fmt) part3L.extend_right(part3R) ######## Return Summary Tables ######## # join table parts then print if returns == 'text': return str(part1) + '\n' + str(part2) + '\n' + str(part3L) elif returns == 'tables': return [part1, part2 ,part3L] elif returns == 'csv': return part1.as_csv() + '\n' + part2.as_csv() + '\n' + \ part3L.as_csv() elif returns == 'latex': print('not available yet') elif returns == 'html': print('not available yet')
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright #time and names are always included import time time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) #create dictionary with default #use lambdas because some values raise exception if they are not available #alternate spellings are commented out to force unique labels default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), #('Model type:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), #('No. of Observations:', lambda: ["%#6d" % results.nobs]), ('No. Observations:', lambda: ["%#6d" % results.nobs]), #('Df model:', lambda: [results.df_model]), ('Df Model:', lambda: ["%#6d" % results.df_model]), #TODO: check when we have non-integer df ('Df Residuals:', lambda: ["%#6d" % results.df_resid]), #('Df resid:', lambda: [results.df_resid]), #('df resid:', lambda: [results.df_resid]), #check capitalization ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception #('Method:', lambda: [???]), #no default for this ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: #default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None) ('Df model:', None), ('Df resid:', None)] try: llf = results.llf gen_left.append(('Log-Likelihood', None)) except: #AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore #replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ #check missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values #pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): #fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): #fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) #padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = map(None, *gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = fmt_2cols #gen_fmt ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = map(None, *gen_left) gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table #, gen_table_left, gen_table_right
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import scikits.statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ import time as time #TODO Make sure all self.model.__class__.__name__ are listed model_types = {'OLS' : 'Ordinary least squares', 'GLS' : 'Generalized least squares', 'GLSAR' : 'Generalized least squares with AR(p)', 'WLS' : 'Weigthed least squares', 'RLM' : 'Robust linear model', 'GLM' : 'Generalized linear model' } model_methods = {'OLS' : 'Least Squares', 'GLS' : 'Least Squares', 'GLSAR' : 'Least Squares', 'WLS' : 'Least Squares', 'RLM' : '?', 'GLM' : '?' } if title==0: title = model_types[self.model.__class__.__name__] if yname is None: try: yname = self.model.endog_names except AttributeError: yname = 'y' if xname is None: try: xname = self.model.exog_names except AttributeError: xname = ['var_%d' % i for i in range(len(self.params))] time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ #dist_family = self.model.family.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ #TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), #What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = map(None, *gen_left) #transpose row col gen_title = title gen_header = None ## gen_stubs_left = ('Model type:', ## 'Date:', ## 'Dependent Variable:', ## 'df model' ## ) ## gen_data_left = [[modeltype], ## [date], ## yname, #What happens with multiple names? ## [df_model] ## ] gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid' ) gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = gen_fmt ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal tstats = {'OLS' : self.t(), 'GLS' : self.t(), 'GLSAR' : self.t(), 'WLS' : self.t(), 'RLM' : self.t(), 'GLM' : self.t() } prob_stats = {'OLS' : self.pvalues, 'GLS' : self.pvalues, 'GLSAR' : self.pvalues, 'WLS' : self.pvalues, 'RLM' : self.pvalues, 'GLM' : self.pvalues } #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = xrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = zip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \ exog_len] ) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title = None, txt_fmt = fmt_2, #gen_fmt, ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def ols_to_csv(): """ exports ols summary data to csv """ pass def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table pass printers = {'OLS': ols_printer, 'GLM' : glm_printer } if returns=='print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright #time and names are always included import time time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) #create dictionary with default #use lambdas because some values raise exception if they are not available #alternate spellings are commented out to force unique labels default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), #('Model type:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), #('No. of Observations:', lambda: ["%#6d" % results.nobs]), ('No. Observations:', lambda: ["%#6d" % results.nobs]), #('Df model:', lambda: [results.df_model]), ('Df Model:', lambda: ["%#6d" % results.df_model]), #TODO: check when we have non-integer df ('Df Residuals:', lambda: ["%#6d" % results.df_resid]), #('Df resid:', lambda: [results.df_resid]), #('df resid:', lambda: [results.df_resid]), #check capitalization ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf] ) #doesn't exist for RLM - exception #('Method:', lambda: [???]), #no default for this ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: #default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None)('Df model:', None), ('Df resid:', None)] try: llf = results.llf gen_left.append(('Log-Likelihood', None)) except: #AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore #replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ #check missing_values = [k for k, v in gen_left + gen_right if v is None] assert missing_values == [], missing_values #pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): #fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): #fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) #padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' ' + k), v) for k, v in gen_right] gen_stubs_right, gen_data_right = map(None, *gen_right) #transpose row col gen_table_right = SimpleTable( gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=fmt_2cols #gen_fmt ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = map(None, *gen_left) gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=fmt_2cols) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table #, gen_table_left, gen_table_right
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import scikits.statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ import time as time #TODO Make sure all self.model.__class__.__name__ are listed model_types = { 'OLS': 'Ordinary least squares', 'GLS': 'Generalized least squares', 'GLSAR': 'Generalized least squares with AR(p)', 'WLS': 'Weigthed least squares', 'RLM': 'Robust linear model', 'GLM': 'Generalized linear model' } model_methods = { 'OLS': 'Least Squares', 'GLS': 'Least Squares', 'GLSAR': 'Least Squares', 'WLS': 'Least Squares', 'RLM': '?', 'GLM': '?' } if title == 0: title = model_types[self.model.__class__.__name__] if yname is None: try: yname = self.model.endog_names except AttributeError: yname = 'y' if xname is None: try: xname = self.model.exog_names except AttributeError: xname = ['var_%d' % i for i in range(len(self.params))] time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ #dist_family = self.model.family.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ #TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [ ('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), #What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = map(None, *gen_left) #transpose row col gen_title = title gen_header = None ## gen_stubs_left = ('Model type:', ## 'Date:', ## 'Dependent Variable:', ## 'df model' ## ) ## gen_data_left = [[modeltype], ## [date], ## yname, #What happens with multiple names? ## [df_model] ## ] gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=gen_fmt) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid') gen_data_right = ( [modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid]) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=gen_fmt) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal tstats = { 'OLS': self.t(), 'GLS': self.t(), 'GLSAR': self.t(), 'WLS': self.t(), 'RLM': self.t(), 'GLM': self.t() } prob_stats = { 'OLS': self.pvalues, 'GLS': self.pvalues, 'GLSAR': self.pvalues, 'WLS': self.pvalues, 'RLM': self.pvalues, 'GLM': self.pvalues } #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1 - alpha) * 100) + '%' param_header = { 'OLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM': ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = xrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = zip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \ exog_len] ) parameter_table = SimpleTable( params_data, param_header[modeltype], params_stubs, title=None, txt_fmt=fmt_2, #gen_fmt, ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table) + '\n' + str(parameter_table) return table def ols_to_csv(): """ exports ols summary data to csv """ pass def glm_printer(): table = str(general_table) + '\n' + str(parameter_table) return table pass printers = {'OLS': ols_printer, 'GLM': glm_printer} if returns == 'print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary_old(self, yname=None, xname=None, title='Generalized linear model', returns='text'): """ Print a table of results or returns SimpleTable() instance which summarizes the Generalized linear model results. Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import scikits.statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- stand_errors are not implimented. conf_int calculated from normal dist. """ import time as Time from scikits.statsmodels.iolib.table import SimpleTable from scikits.statsmodels.stats.stattools import (jarque_bera, omni_normtest, durbin_watson) yname = 'Y' if xname is None: xname = ['x%d' % i for i in range(self.model.exog.shape[1])] #List of results used in summary #yname = yname #xname = xname time = Time.localtime() dist_family = self.model.family.__class__.__name__ aic = self.aic bic = self.bic deviance = self.deviance df_model = self.df_model df_resid = self.df_resid fittedvalues = self.fittedvalues llf = self.llf mu = self.mu nobs = self.nobs normalized_cov_params = self.normalized_cov_params null_deviance = self.null_deviance params = self.params pearson_chi2 = self.pearson_chi2 pinv_wexog = self.pinv_wexog resid_anscombe = self.resid_anscombe resid_deviance = self.resid_deviance resid_pearson = self.resid_pearson resid_response = self.resid_response resid_working = self.resid_working scale = self.scale #TODO #stand_errors = self.stand_errors stand_errors = self.bse #[' ' for x in range(len(self.params))] #Added note about conf_int pvalues = self.pvalues conf_int = self.conf_int() cov_params = self.cov_params() #f_test() = self.f_test() t = self.tvalues #t_test = self.t_test() table_1l_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s", "%s"], empty_cell = '', colwidths = 15, colsep=' ', row_pre = ' ', row_post = ' ', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) # Note table_1l_fmt over rides the below formating. in extend_right? JP table_1r_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s", "%1s"], empty_cell = '', colwidths = 12, colsep=' ', row_pre = '', row_post = '', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) table_2_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s"], #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], #data_fmts = ["%#10.4g","%#6.4f", "%#6.4f"], #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell = '', colwidths = 13, colsep=' ', row_pre = ' ', row_post = ' ', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) ######## summary table 1 ####### table_1l_title = title table_1l_header = None table_1l_stubs = ('Model Family:', 'Method:', 'Dependent Variable:', 'Date:', 'Time:', ) table_1l_data = [ [dist_family], ['IRLS'], [yname], [Time.strftime("%a, %d %b %Y",time)], [Time.strftime("%H:%M:%S",time)], ] table_1l = SimpleTable(table_1l_data, table_1l_header, table_1l_stubs, title=table_1l_title, txt_fmt = table_1l_fmt) table_1r_title = None table_1r_header = None table_1r_stubs = ('# of obs:', 'Df residuals:', 'Df model:', 'Scale:', 'Log likelihood:' ) table_1r_data = [ [nobs], [df_resid], [df_model], ["%#6.4f" % (scale,)], ["%#6.4f" % (llf,)] ] table_1r = SimpleTable(table_1r_data, table_1r_header, table_1r_stubs, title=table_1r_title, txt_fmt = table_1r_fmt) ######## summary table 2 ####### #TODO add % range to confidance interval column header table_2header = ('coefficient', 'stand errors', 't-statistic', 'Conf. Interval') table_2stubs = xname table_2data = zip(["%#6.4f" % (params[i]) for i in range(len(xname))], ["%#6.4f" % stand_errors[i] for i in range(len(xname))], ["%#6.4f" % (t[i]) for i in range(len(xname))], [""" [%#6.3f, %#6.3f]""" % tuple(conf_int[i]) for i in range(len(xname))]) #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} table_2 = SimpleTable(table_2data, table_2header, table_2stubs, title=None, txt_fmt = table_2_fmt) ######## Return Summary Tables ######## # join table table_s then print if returns == 'text': table_1l.extend_right(table_1r) return str(table_1l) + '\n' + str(table_2) elif returns == 'print': table_1l.extend_right(table_1r) print(str(table_1l) + '\n' + str(table_2)) elif returns == 'tables': return [table_1l, table_1r, table_2] #return [table_1, table_2 ,table_3L, notes] elif returns == 'csv': return table_1.as_csv() + '\n' + table_2.as_csv() + '\n' + \ table_3L.as_csv() elif returns == 'latex': print('not avalible yet') elif returns == html: print('not avalible yet')