def generate_table(left_col, right_col, table_title): # Do not use column headers col_headers = None # Generate the right table if right_col: # Add padding if len(right_col) < len(left_col): right_col += [(' ', ' ')] * (len(left_col) - len(right_col)) elif len(right_col) > len(left_col): left_col += [(' ', ' ')] * (len(right_col) - len(left_col)) right_col = [('%-21s' % (' '+k), v) for k,v in right_col] # Generate the right table gen_stubs_right, gen_data_right = zip_longest(*right_col) gen_table_right = SimpleTable(gen_data_right, col_headers, gen_stubs_right, title = table_title, txt_fmt = fmt_2cols) else: # If there is no right table set the right table to empty gen_table_right = [] # Generate the left table gen_stubs_left, gen_data_left = zip_longest(*left_col) gen_table_left = SimpleTable(gen_data_left, col_headers, gen_stubs_left, title = table_title, txt_fmt = fmt_2cols) # Merge the left and right tables to make a single table gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table
def insert_header_row(self, rownum, headers, dec_below='header_dec_below'): """Return None. Insert a row of headers, where ``headers`` is a sequence of strings. (The strings may contain newlines, to indicated multiline headers.) """ header_rows = [header.split('\n') for header in headers] # rows in reverse order rows = list(zip_longest(*header_rows, **dict(fillvalue=''))) rows.reverse() for i, row in enumerate(rows): self.insert(rownum, row, datatype='header') if i == 0: self[rownum].dec_below = dec_below else: self[rownum].dec_below = None
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ---------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Default : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load(as_pandas=False) >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ if title == 0: title = _model_types[self.model.__class__.__name__] if xname is not None and len(xname) != len(self.params): # GH 2298 raise ValueError('User supplied xnames must have the same number of ' 'entries as the number of model parameters ' '({0})'.format(len(self.params))) yname, xname = _getnames(self, yname, xname) time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ # TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [ ('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), # TODO: What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=gen_fmt) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid') gen_data_right = ( [modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid]) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=gen_fmt) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left # Parameters part of the summary table # ------------------------------------ # Note: this is not necessary since we standardized names, # only t versus normal tstats = { 'OLS': self.t(), 'GLS': self.t(), 'GLSAR': self.t(), 'WLS': self.t(), 'RLM': self.t(), 'GLM': self.t() } prob_stats = { 'OLS': self.pvalues, 'GLS': self.pvalues, 'GLSAR': self.pvalues, 'WLS': self.pvalues, 'RLM': self.pvalues, 'GLM': self.pvalues } # Dictionary to store the header names for the parameter part of the # summary table. look up by modeltype alp = str((1 - alpha) * 100) + '%' param_header = { 'OLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM': ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = lrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len]) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title=None, txt_fmt=fmt_2) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table) + '\n' + str(parameter_table) return table def glm_printer(): table = str(general_table) + '\n' + str(parameter_table) return table printers = {'OLS': ols_printer, 'GLM': glm_printer} if returns == 'print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright # time and names are always included time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) # create dictionary with default # use lambdas because some values raise exception if they are not available default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), ('No. Observations:', lambda: [d_or_f(results.nobs)]), ('Df Model:', lambda: [d_or_f(results.df_model)]), ('Df Residuals:', lambda: [d_or_f(results.df_resid)]), ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf] ) # doesn't exist for RLM - exception ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: # default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None), ('Df model:', None), ('Df resid:', None)] try: llf = results.llf # noqa: F841 gen_left.append(('Log-Likelihood', None)) except: # AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None # replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ # check nothing was missed missing_values = [k for k, v in gen_left + gen_right if v is None] assert missing_values == [], missing_values # pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): # fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): # fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) # padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' ' + k), v) for k, v in gen_right] gen_stubs_right, gen_data_right = zip_longest( *gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=fmt_2cols) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=fmt_2cols) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ---------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Default : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load(as_pandas=False) >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ if title == 0: title = _model_types[self.model.__class__.__name__] yname, xname = _getnames(self, yname, xname) time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ # TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), # TODO: What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid') gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=gen_fmt ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left # Parameters part of the summary table # ------------------------------------ # Note: this is not necessary since we standardized names, # only t versus normal tstats = {'OLS': self.t(), 'GLS': self.t(), 'GLSAR': self.t(), 'WLS': self.t(), 'RLM': self.t(), 'GLM': self.t()} prob_stats = {'OLS': self.pvalues, 'GLS': self.pvalues, 'GLSAR': self.pvalues, 'WLS': self.pvalues, 'RLM': self.pvalues, 'GLM': self.pvalues } # Dictionary to store the header names for the parameter part of the # summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = lrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len]) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title=None, txt_fmt=fmt_2 ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table printers = {'OLS': ols_printer, 'GLM': glm_printer} if returns == 'print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright # time and names are always included time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) # create dictionary with default # use lambdas because some values raise exception if they are not available default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), ('No. Observations:', lambda: [d_or_f(results.nobs)]), ('Df Model:', lambda: [d_or_f(results.df_model)]), ('Df Residuals:', lambda: [d_or_f(results.df_resid)]), ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) # doesn't exist for RLM - exception ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: # default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None), ('Df model:', None), ('Df resid:', None)] try: llf = results.llf # noqa: F841 gen_left.append(('Log-Likelihood', None)) except: # AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None # replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ # check nothing was missed missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values # pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): # fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): # fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) # padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=fmt_2cols ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table