def test_SimpleTable_2(self): # Test SimpleTable.extend_right() desired = ''' ============================================================= header s1 header d1 header s2 header d2 ------------------------------------------------------------- stub R1 C1 10.30312 10.73999 stub R1 C2 50.95038 50.65765 stub R2 C1 90.30312 90.73999 stub R2 C2 40.95038 40.65765 ------------------------------------------------------------- ''' data1 = [[10.30312, 10.73999], [90.30312, 90.73999]] data2 = [[50.95038, 50.65765], [40.95038, 40.65765]] stubs1 = ['stub R1 C1', 'stub R2 C1'] stubs2 = ['stub R1 C2', 'stub R2 C2'] header1 = ['header s1', 'header d1'] header2 = ['header s2', 'header d2'] actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt) actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt) actual1.extend_right(actual2) actual = '\n%s\n' % actual1.as_text() self.assertEqual(desired, str(actual))
def test_simple_table_2(self): # Test SimpleTable.extend_right() desired = ''' ============================================================= header s1 header d1 header s2 header d2 ------------------------------------------------------------- stub R1 C1 10.30312 10.73999 stub R1 C2 50.95038 50.65765 stub R2 C1 90.30312 90.73999 stub R2 C2 40.95038 40.65765 ------------------------------------------------------------- ''' data1 = [[10.30312, 10.73999], [90.30312, 90.73999]] data2 = [[50.95038, 50.65765], [40.95038, 40.65765]] stubs1 = ['stub R1 C1', 'stub R2 C1'] stubs2 = ['stub R1 C2', 'stub R2 C2'] header1 = ['header s1', 'header d1'] header2 = ['header s2', 'header d2'] actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt) actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt) actual1.extend_right(actual2) actual = '\n%s\n' % actual1.as_text() assert_equal(desired, str(actual))
def generate_table(left_col, right_col, table_title): # Do not use column headers col_headers = None # Generate the right table if right_col: # Add padding if len(right_col) < len(left_col): right_col += [(' ', ' ')] * (len(left_col) - len(right_col)) elif len(right_col) > len(left_col): left_col += [(' ', ' ')] * (len(right_col) - len(left_col)) right_col = [('%-21s' % (' '+k), v) for k,v in right_col] # Generate the right table gen_stubs_right, gen_data_right = zip_longest(*right_col) gen_table_right = SimpleTable(gen_data_right, col_headers, gen_stubs_right, title = table_title, txt_fmt = fmt_2cols) else: # If there is no right table set the right table to empty gen_table_right = [] # Generate the left table gen_stubs_left, gen_data_left = zip_longest(*left_col) gen_table_left = SimpleTable(gen_data_left, col_headers, gen_stubs_left, title = table_title, txt_fmt = fmt_2cols) # Merge the left and right tables to make a single table gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table
def _top_table( top_left: Sequence[Tuple[str, str]], top_right: Sequence[Tuple[str, str]], title: str, ) -> SimpleTable: stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) fmt = fmt_2cols.copy() fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) return table
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ---------- yname : str optional, Default is `Y` xname : list[str] optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : str optional, Defualt is 'Generalized linear model' returns : str 'text', 'table', 'csv', 'latex', 'html' Returns ------- Default : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ if title == 0: title = _model_types[self.model.__class__.__name__] if xname is not None and len(xname) != len(self.params): # GH 2298 raise ValueError('User supplied xnames must have the same number of ' 'entries as the number of model parameters ' '({0})'.format(len(self.params))) yname, xname = _getnames(self, yname, xname) time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ # TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [ ('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), # TODO: What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=gen_fmt) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid') gen_data_right = ( [modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid]) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=gen_fmt) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left # Parameters part of the summary table # ------------------------------------ # Note: this is not necessary since we standardized names, # only t versus normal tstats = { 'OLS': self.t(), 'GLS': self.t(), 'GLSAR': self.t(), 'WLS': self.t(), 'RLM': self.t(), 'GLM': self.t() } prob_stats = { 'OLS': self.pvalues, 'GLS': self.pvalues, 'GLSAR': self.pvalues, 'WLS': self.pvalues, 'RLM': self.pvalues, 'GLM': self.pvalues } # Dictionary to store the header names for the parameter part of the # summary table. look up by modeltype alp = str((1 - alpha) * 100) + '%' param_header = { 'OLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM': ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = lrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len]) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title=None, txt_fmt=fmt_2) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table) + '\n' + str(parameter_table) return table def glm_printer(): table = str(general_table) + '\n' + str(parameter_table) return table printers = {'OLS': ols_printer, 'GLM': glm_printer} if returns == 'print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright # time and names are always included time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) # create dictionary with default # use lambdas because some values raise exception if they are not available default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), ('No. Observations:', lambda: [d_or_f(results.nobs)]), ('Df Model:', lambda: [d_or_f(results.df_model)]), ('Df Residuals:', lambda: [d_or_f(results.df_resid)]), ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf] ) # does not exist for RLM - exception ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: # default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None), ('Df model:', None), ('Df resid:', None)] try: llf = results.llf # noqa: F841 gen_left.append(('Log-Likelihood', None)) except: # AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None # replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ # check nothing was missed missing_values = [k for k, v in gen_left + gen_right if v is None] assert missing_values == [], missing_values # pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): # fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): # fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) # padding in SimpleTable does not work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' ' + k), v) for k, v in gen_right] gen_stubs_right, gen_data_right = zip_longest( *gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=fmt_2cols) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=fmt_2cols) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): # General part of the summary table if title == 0: title = 'High Dimensional Fixed Effect Regression Results' if type(xname) == str: xname = [xname] if type(yname) == str: yname = [yname] if xname is not None and len(xname) != len(self.xname): # GH 2298 raise ValueError( 'User supplied xnames must have the same number of ' 'entries as the number of model parameters ' '({0})'.format(len(self.xname))) if yname is not None and len(yname) != len(self.yname): raise ValueError( 'User supplied ynames must have the same number of ' 'entries as the number of model dependent variables ' '({0})'.format(len(self.yname))) if xname is None: xname = self.xname if yname is None: yname = self.yname time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) nobs = int(self.nobs) df_model = self.df resid_std_err = forg(self.resid_std_err, 4) Covariance_Type = self.Covariance_Type cluster_method = self.cluster_method gen_left = [ ('Dep. Variable:', yname), ('No. Observations:', [nobs]), # TODO: What happens with multiple names? ('DoF of residual:', [df_model]), ('Residual std err:', [resid_std_err]), ('Covariance Type:', [Covariance_Type]), ('Cluster Method:', [cluster_method]) ] r_squared = forg(self.rsquared, 4) rsquared_adj = forg(self.rsquared_adj, 4) full_rsquared = forg(self.full_rsquared, 4) full_rsquared_adj = forg(self.full_rsquared_adj, 4) fvalue = forg(self.fvalue, 4) f_pvalue = forg(self.f_pvalue, 4) full_fvalue = forg(self.full_fvalue, 4) full_f_pvalue = forg(self.full_f_pvalue, 4) gen_right = [('R-squared(proj model):', [r_squared]), ('Adj. R-squared(proj model):', [rsquared_adj]), ('R-squared(full model):', [full_rsquared]), ('Adj. R-squared(full model):', [full_rsquared_adj]), ('F-statistic(proj model):', [fvalue]), ('Prob (F-statistic (proj model)):', [f_pvalue]), ('DoF of F-test (proj model):', [self.f_df_proj]), ('F-statistic(full model):', [full_fvalue]), ('Prob (F-statistic (full model)):', [full_f_pvalue]), ('DoF of F-test (full model):', [self.f_df_full])] # pad both tables to equal number of rows if len(gen_right) < len(gen_left): # fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): # fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) gen_stubs_left, gen_data_left = zip_longest(*gen_left) gen_title = title gen_header = None gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=gen_fmt) gen_stubs_right, gen_data_right = zip_longest(*gen_right) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=gen_fmt) gen_table_left.extend_right(gen_table_right) self.general_table = gen_table_left # Parameters part of the summary table s_alp = alpha / 2 c_alp = 1 - alpha / 2 if Covariance_Type == 'nonrobust': self.std_err_name = 'nonrobust std err' elif Covariance_Type == 'robust': self.std_err_name = 'robust std err' elif Covariance_Type == 'clustered': self.std_err_name = 'cluster std err' else: self.std_err_name = 'std err' param_header = [ 'coef', self.std_err_name, 't', 'P>|t|', '[' + str(s_alp), str(c_alp) + ']' ] # alp + ' Conf. Interval' params_stubs = xname params = self.params.copy() conf_int = self.conf_int(alpha) std_err = self.bse.copy() exog_len = lrange(len(xname)) tstat = self.tvalues.copy() prob_stat = self.pvalues.copy() for i in range(len(self.params)): params[i] = forg(self.params[i], 5) std_err[i] = forg(self.bse[i], 5) tstat[i] = forg(self.tvalues[i], 4) prob_stat[i] = forg(self.pvalues[i], 4) # Simpletable should be able to handle the formating params_data = lzip(["%#6.5f" % (params[i]) for i in exog_len], ["%#6.5f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["%#6.4f" % conf_int[0][i] for i in exog_len], ["%#6.4f" % conf_int[1][i] for i in exog_len]) self.parameter_table = SimpleTable(params_data, param_header, params_stubs, title=None, txt_fmt=fmt_2) print(self.general_table) print(self.parameter_table) return
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'User-specified Parameters'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [ ('R-squared:', '--'), ('Adj. R-squared:', '--'), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('', ''), ('', ''), ] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = self._names header = ['coef'] vals = (self.params, ) formats = [(10, 4)] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ('Results generated with user-specified parameters.', 'Since the model was not estimated, there are no std. ' 'errors.') smry.add_extra_txt(extra_text) return smry
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'Maximum Likelihood'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [('R-squared:', '%#8.3f' % self.rsquared), ('Adj. R-squared:', '%#8.3f' % self.rsquared_adj), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('Df Residuals:', self.nobs - self.num_params), ('Df Model:', self.num_params)] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append('[' + format_float_fixed(c[0], 7, 3) + ',' + format_float_fixed(c[1], 7, 3) + ']') stubs = self._names header = ['coef', 'std err', 't', 'P>|t|', '95.0% Conf. Int.'] vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str) formats = [(10, 4), (9, 3), (9, 3), (9, 3), None] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ['Covariance estimator: ' + self.cov_type] if self.convergence_flag: extra_text.append(""" WARNING: The optimizer did not indicate sucessful convergence. The message was {string_message}. See convergence_flag.""".format( string_message=self._optim_output[-1])) smry.add_extra_txt(extra_text) return smry
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright #time and names are always included import time time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) #create dictionary with default #use lambdas because some values raise exception if they are not available #alternate spellings are commented out to force unique labels default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), #('Model type:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), #('No. of Observations:', lambda: ["%#6d" % results.nobs]), ('No. Observations:', lambda: ["%#6d" % results.nobs]), #('Df model:', lambda: [results.df_model]), ('Df Model:', lambda: ["%#6d" % results.df_model]), #TODO: check when we have non-integer df ('Df Residuals:', lambda: ["%#6d" % results.df_resid]), #('Df resid:', lambda: [results.df_resid]), #('df resid:', lambda: [results.df_resid]), #check capitalization ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception #('Method:', lambda: [???]), #no default for this ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: #default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None) ('Df model:', None), ('Df resid:', None)] try: llf = results.llf gen_left.append(('Log-Likelihood', None)) except: #AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore #replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ #check missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values #pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): #fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): #fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) #padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = fmt_2cols #gen_fmt ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table #, gen_table_left, gen_table_right
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ import time as time #TODO Make sure all self.model.__class__.__name__ are listed model_types = {'OLS' : 'Ordinary least squares', 'GLS' : 'Generalized least squares', 'GLSAR' : 'Generalized least squares with AR(p)', 'WLS' : 'Weigthed least squares', 'RLM' : 'Robust linear model', 'GLM' : 'Generalized linear model' } model_methods = {'OLS' : 'Least Squares', 'GLS' : 'Least Squares', 'GLSAR' : 'Least Squares', 'WLS' : 'Least Squares', 'RLM' : '?', 'GLM' : '?' } if title==0: title = model_types[self.model.__class__.__name__] if yname is None: try: yname = self.model.endog_names except AttributeError: yname = 'y' if xname is None: try: xname = self.model.exog_names except AttributeError: xname = ['var_%d' % i for i in range(len(self.params))] time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ #dist_family = self.model.family.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ #TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), #What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None ## gen_stubs_left = ('Model type:', ## 'Date:', ## 'Dependent Variable:', ## 'df model' ## ) ## gen_data_left = [[modeltype], ## [date], ## yname, #What happens with multiple names? ## [df_model] ## ] gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid' ) gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = gen_fmt ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal tstats = {'OLS' : self.t(), 'GLS' : self.t(), 'GLSAR' : self.t(), 'WLS' : self.t(), 'RLM' : self.t(), 'GLM' : self.t() } prob_stats = {'OLS' : self.pvalues, 'GLS' : self.pvalues, 'GLSAR' : self.pvalues, 'WLS' : self.pvalues, 'RLM' : self.pvalues, 'GLM' : self.pvalues } #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = xrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = zip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \ exog_len] ) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title = None, txt_fmt = fmt_2, #gen_fmt, ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def ols_to_csv(): """ exports ols summary data to csv """ pass def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table pass printers = {'OLS': ols_printer, 'GLM' : glm_printer } if returns=='print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'User-specified Parameters'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [('R-squared:', '--'), ('Adj. R-squared:', '--'), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('', ''), ('', ''),] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = self._names header = ['coef'] vals = (self.params,) formats = [(10, 4)] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ('Results generated with user-specified parameters.', 'Since the model was not estimated, there are no std. ' 'errors.') smry.add_extra_txt(extra_text) return smry
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'Maximum Likelihood'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [('R-squared:', '%#8.3f' % self.rsquared), ('Adj. R-squared:', '%#8.3f' % self.rsquared_adj), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('Df Residuals:', self.nobs - self.num_params), ('Df Model:', self.num_params)] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append('[' + format_float_fixed(c[0], 7, 3) + ',' + format_float_fixed(c[1], 7, 3) + ']') stubs = self._names header = ['coef', 'std err', 't', 'P>|t|', '95.0% Conf. Int.'] vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str) formats = [(10, 4), (9, 3), (9, 3), (9, 3), None] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ['Covariance estimator: ' + self.cov_type] if self.convergence_flag: extra_text.append(""" WARNING: The optimizer did not indicate sucessful convergence. The message was {string_message}. See convergence_flag.""".format( string_message=self._optim_output[-1])) smry.add_extra_txt(extra_text) return smry
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + " - " + model.volatility.name # Summary Header top_left = [ ("Dep. Variable:", self._dep_name), ("Mean Model:", model.name), ("Vol Model:", model.volatility.name), ("Distribution:", model.distribution.name), ("Method:", "User-specified Parameters"), ("", ""), ("Date:", self._datetime.strftime("%a, %b %d %Y")), ("Time:", self._datetime.strftime("%H:%M:%S")), ] top_right = [ ("R-squared:", "--"), ("Adj. R-squared:", "--"), ("Log-Likelihood:", "%#10.6g" % self.loglikelihood), ("AIC:", "%#10.6g" % self.aic), ("BIC:", "%#10.6g" % self.bic), ("No. Observations:", self._nobs), ("", ""), ("", ""), ] title = model_name + " Model Results" stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = self._names header = ["coef"] vals = (self.params,) formats = [(10, 4)] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ("Mean Model", "Volatility Model", "Distribution") total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total : total + count] table_stubs = stubs[total : total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ( "Results generated with user-specified parameters.", "Since the model was not estimated, there are no std. " "errors.", ) smry.add_extra_txt(extra_text) return smry
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + " - " + model.volatility.name # Summary Header top_left = [ ("Dep. Variable:", self._dep_name), ("Mean Model:", model.name), ("Vol Model:", model.volatility.name), ("Distribution:", model.distribution.name), ("Method:", "Maximum Likelihood"), ("", ""), ("Date:", self._datetime.strftime("%a, %b %d %Y")), ("Time:", self._datetime.strftime("%H:%M:%S")), ] top_right = [ ("R-squared:", "%#8.3f" % self.rsquared), ("Adj. R-squared:", "%#8.3f" % self.rsquared_adj), ("Log-Likelihood:", "%#10.6g" % self.loglikelihood), ("AIC:", "%#10.6g" % self.aic), ("BIC:", "%#10.6g" % self.bic), ("No. Observations:", self._nobs), ("Df Residuals:", self.nobs - self.num_params), ("Df Model:", self.num_params), ] title = model_name + " Model Results" stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append("[" + format_float_fixed(c[0], 7, 3) + "," + format_float_fixed(c[1], 7, 3) + "]") stubs = self._names header = ["coef", "std err", "t", "P>|t|", "95.0% Conf. Int."] vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str) formats = [(10, 4), (9, 3), (9, 3), (9, 3), None] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ("Mean Model", "Volatility Model", "Distribution") total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total : total + count] table_stubs = stubs[total : total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ("Covariance estimator: " + self.cov_type,) smry.add_extra_txt(extra_text) return smry