def summary_find_nfact(self): '''provides a summary for the selection of the number of factors Returns ------- sumstr : string summary of the results for selecting the number of factors ''' if not hasattr(self, 'results_find_nfact'): self.fit_find_nfact() results = self.results_find_nfact sumstr = '' sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O' # best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0), # np.argmin(results[:,-1],0))] sumstr += '\n' + ' ' * 19 + '%5d %4d %6d %5d' % tuple(self.best_nfact) from statsmodels.iolib.table import SimpleTable headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ') numformat = ['%6d'] + ['%10.3f'] * 4 #'%10.4f' txt_fmt1 = dict(data_fmts=numformat) tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1) sumstr += '\n' + "PCA regression on simulated data," sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables" sumstr += '\n' + tabl.__str__() sumstr += '\n' + "Notes: k is number of components of PCA," sumstr += '\n' + " constant is added additionally" sumstr += '\n' + " k=0 means regression on constant only" sumstr += '\n' + " L1O: sum of squared prediction errors for leave-one-out" return sumstr
def test_simple_table_3(self): # Test SimpleTable.extend() as in extend down desired = ''' ============================== header s1 header d1 ------------------------------ stub R1 C1 10.30312 10.73999 stub R2 C1 90.30312 90.73999 header s2 header d2 ------------------------------ stub R1 C2 50.95038 50.65765 stub R2 C2 40.95038 40.65765 ------------------------------ ''' data1 = [[10.30312, 10.73999], [90.30312, 90.73999]] data2 = [[50.95038, 50.65765], [40.95038, 40.65765]] stubs1 = ['stub R1 C1', 'stub R2 C1'] stubs2 = ['stub R1 C2', 'stub R2 C2'] header1 = ['header s1', 'header d1'] header2 = ['header s2', 'header d2'] actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt) actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt) actual1.extend(actual2) actual = '\n%s\n' % actual1.as_text() assert_equal(desired, str(actual))
def _df_to_simpletable( df, align="r", float_format="%.4f", header=True, index=True, table_dec_above="-", table_dec_below=None, header_dec_below="-", pad_col=0, pad_index=0, ): dat = df.copy() dat = dat.applymap(lambda x: _formatter(x, float_format)) if header: headers = [str(x) for x in dat.columns.tolist()] else: headers = None if index: stubs = [str(x) + int(pad_index) * " " for x in dat.index.tolist()] else: dat.ix[:, 0] = [str(x) + int(pad_index) * " " for x in dat.ix[:, 0]] stubs = None st = SimpleTable( np.array(dat), headers=headers, stubs=stubs, ltx_fmt=fmt_latex, txt_fmt=fmt_txt ) st.output_formats["latex"]["data_aligns"] = align st.output_formats["txt"]["data_aligns"] = align st.output_formats["txt"]["table_dec_above"] = table_dec_above st.output_formats["txt"]["table_dec_below"] = table_dec_below st.output_formats["txt"]["header_dec_below"] = header_dec_below st.output_formats["txt"]["colsep"] = " " * int(pad_col + 1) return st
def _df_to_simpletable(df, align='r', float_format="%.4f", header=True, index=True, table_dec_above='-', table_dec_below=None, header_dec_below='-', pad_col=0, pad_index=0): dat = df.copy() dat = dat.applymap(lambda x: _formatter(x, float_format)) if header: headers = [str(x) for x in dat.columns.tolist()] else: headers = None if index: stubs = [str(x) + int(pad_index) * ' ' for x in dat.index.tolist()] else: dat.iloc[:, 0] = [str(x) + int(pad_index) * ' ' for x in dat.iloc[:, 0]] stubs = None st = SimpleTable(np.array(dat), headers=headers, stubs=stubs, ltx_fmt=fmt_latex, txt_fmt=fmt_txt) st.output_formats['latex']['data_aligns'] = align st.output_formats['txt']['data_aligns'] = align st.output_formats['txt']['table_dec_above'] = table_dec_above st.output_formats['txt']['table_dec_below'] = table_dec_below st.output_formats['txt']['header_dec_below'] = header_dec_below st.output_formats['txt']['colsep'] = ' ' * int(pad_col + 1) return st
def acorr_estimates(self): """ Printing the AR coefficient terms to match sas output """ # autoCovDf = pd.DataFrame(np.round(self.estimated_acov,8), # columns=['Covariance'], index=range(1,len(self.estimated_acov)+1)) # autoCorrDf = pd.DataFrame(np.round(self.estimated_acorr,8), # columns=['Correlation'], index=range(1,len(self.estimated_acorr)+1)) # out = pd.concat([autoCovDf, autoCorrDf], axis=1, join='outer'); out.index.name = 'Lag' pic = [] for i in self.estimated_acorr: if (i == 1): pic.append(" " * 20 + "|" + "*" * 20) elif (i < 0): temp = int(np.abs(np.round(i * 20, 0))) pic.append(' ' * (20 - temp) + '*' * temp + "|" + " " * 20) #print( ' '*(20 - temp) + '*' * temp) else: temp = int(np.abs(np.round(i * 20, 0))) pic.append(' ' * 20 + "|" + '*' * temp + " " * (20 - temp)) data = list( zip(np.arange(0, self.ar + 1), self.estimated_acov, self.estimated_acorr, pic)) tbl = SimpleTable(data, [ "Lag", "Autocovariance", "Autocorrelation", "-1" + " " * 18 + "0" + " " * 19 + "1" ], title="Estimates of Autocorrelations") return tbl
def test_customlabel(self): # Limited test of custom custom labeling tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1) tbl[1][1].data = np.nan tbl.label_cells(custom_labeller) #print([[c.datatype for c in row] for row in tbl]) desired = """ ***************************** * * header1 * header2 * ***************************** * stub1 * -- * 1 * * stub2 * 2.00 * 3 * ***************************** """ actual = '\n%s\n' % tbl.as_text(missing='--') self.assertEqual(actual, desired)
def sm_summary_restrict(summary, dropFE=False, alpha=None): ''' Drops fixed effects or nonsignificant predictors. Can do both simultaneously input: * summary - statsmodels summary() object (i.e. smf.ols(formula).fit().summary() ) * dropFE - True/False, default is False - drop fixed effects (any predictor with 'C(' in name * alpha - Float, alpha value to drop parameters at (i.e. if predictor is 0.08 and alpha=0.05, predictor will not be included) output: * summary - statsmodels summary() object ''' from statsmodels.iolib.table import SimpleTable tblheader = summary.tables[1].data[0] tbldata = [] for r in summary.tables[1].data[1:]: if dropFE == True: if 'C(' in r[0]: continue else: pass if alpha != None: if float(r[-2]) < alpha: tbldata.append(r) else: tbldata.append(r) #Add back in our restricted table summary.tables[1] = SimpleTable(tbldata, tblheader) return summary
def summary(self) -> SimpleTable: """ Summary table of the descriptive statistics Returns ------- SimpleTable A table instance supporting export to text, csv and LaTeX """ df = self.frame.astype(object) df = df.fillna("") cols = [str(col) for col in df.columns] stubs = [str(idx) for idx in df.index] data = [] for _, row in df.iterrows(): data.append([v for v in row]) def _formatter(v): if isinstance(v, str): return v elif v // 1 == v: return str(int(v)) return f"{v:0.4g}" return SimpleTable( data, header=cols, stubs=stubs, title="Descriptive Statistics", txt_fmt={"data_fmts": {0: "%s", 1: _formatter}}, datatypes=[1] * len(data), )
def _param_table( params: NDArray, se: NDArray, tstats: NDArray, pvalues: NDArray, stubs: Sequence[str], title: str, ) -> SimpleTable: ci = params[:, None] + se[:, None] * stats.norm.ppf([[0.025, 0.975]]) param_data = np.column_stack([params, se, tstats, pvalues, ci]) data = [] for row in param_data: txt_row = [] for i, v in enumerate(row): f = str_format if i == 3: f = pval_format txt_row.append(f(v)) data.append(txt_row) header = [ "Parameter", "Std. Err.", "T-stat", "P-value", "Lower CI", "Upper CI" ] table = SimpleTable(data, stubs=stubs, txt_fmt=fmt_params, headers=header, title=title) return table
def test_customlabel(self): # Limited test of custom custom labeling tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1) tbl[1][1].data = np.nan tbl.label_cells(custom_labeller) #print([[c.datatype for c in row] for row in tbl]) desired = """ ***************************** * * header1 * header2 * ***************************** * stub1 * -- * 1 * * stub2 * 2.00 * 3 * ***************************** """ actual = '\n%s\n' % tbl.as_text(missing='--') assert_equal(actual, desired)
def display(self): now = datetime.now() pred_hashes = [self._build_xarray_hash(pred) for pred in self.preds] data = [ ("Date:", [now.strftime(self.date_format)]), ("Time:", [now.strftime(self.time_format)]), ("No. Timepoints:", [self.target.sizes["dt"]]), ("Horizon", [self.target.sizes["h"]]), ] if self.ref_name: data += [(f"Reference Metric", [self.ref_name])] data += [("Target Hash", [self._build_xarray_hash(self.target)])] for name, pred_hash in zip(self.names, pred_hashes): # done explicitly to make it clear data += [(f'"{name}" Hash', [pred_hash])] if self.ref is not None: data += [(f"Reference Metric Hash", [self._build_xarray_hash(self.ref)])] return SimpleTable( data=list(map(itemgetter(1), data)), stubs=list(map(itemgetter(0), data)), title="Dataset Description", ).as_html()
def summary(self): """Generates a summary table with basic statistics for each submodel. TODO: immprove interface with linearmodel, so that this doesn't rely on private methods. """ summary = Summary() headers = ['Model form', 'Observations', 'Adjusted r^2', 'P value'] table_data = [] # for each model for model in self._models: row = [] # populate row with model statistics res = model._model._model.fit() row.append(model._model.get_model_formula()) row.append(round(res.nobs)) row.append(round(res.rsquared_adj, 2)) row.append(format(res.f_pvalue, '.1E')) # append the row to the data table_data.append(row) # create table with data and headers:w table = SimpleTable(data=table_data, headers=headers) # add table to summary summary.tables.append(table) return summary
def test_SimpleTable_special_chars(self): # Simple table with characters: (%, >, |, _, $, &, #) cell0c_data = 22 cell1c_data = 1053 row0c_data = [cell0c_data, cell1c_data] row1c_data = [23, 6250.4] table1c_data = [row0c_data, row1c_data] test1c_stubs = ('>stub1%', 'stub_2') test1c_header = ('#header1$', 'header&|') tbl_c = SimpleTable(table1c_data, test1c_header, test1c_stubs, ltx_fmt=ltx_fmt1) def test_ltx_special_chars(self): # Test for special characters (latex) in headers and stubs desired = r""" \begin{tabular}{lcc} \toprule & \textbf{\#header1\$} & \textbf{header\&$|$} \\ \midrule \textbf{$>$stub1\%} & 22 & 1053 \\ \textbf{stub\_2} & 23 & 6250.4 \\ \bottomrule \end{tabular} """ actual = '\n%s\n' % tbl_c.as_latex_tabular(center=False) self.assertEqual(actual, desired) test_ltx_special_chars(self)
def feat_imp(cols, fi): ''' Function for amazing showing of feature importances Input: 1) cols - list of feature names 2) fi - np.array of feature importances Output: 1) Table with features and their importances; 2) Vizualization over barplot. ''' import numpy as np from statsmodels.iolib.table import SimpleTable fi = np.round(fi, 3) indices = np.argsort(fi)[::-1] cols = [cols[i] for i in indices] print(SimpleTable(np.append([cols], [fi], axis=0).T, ['Feature','Importance'])) all_colors = list(plt.cm.colors.cnames.keys()) c = np.random.choice(all_colors, fi.shape[0], replace=False) plt.figure() plt.title('Feature importances') plt.bar(range(fi.shape[0]), fi[indices], color=c, width=.5) plt.xticks(range(fi.shape[0]), cols, rotation=45) plt.show();
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None, title=None): '''create summary table of regression parameters with several equations This allows interleaving of parameters with bse and/or tvalues Parameters ---------- result : result instance the result instance with params and attributes in extras extras : list[str] additional attributes to add below a parameter row, e.g. bse or tvalues endog_names : {list[str], None} names for rows of the parameter array (multivariate endog) exog_names : {list[str], None} names for columns of the parameter array (exog) alpha : float level for confidence intervals, default 0.95 title : None or string Returns ------- tables : list of SimpleTable this contains a list of all seperate Subtables table_all : SimpleTable the merged table with results concatenated for each row of the parameter array ''' if endog_names is None: # TODO: note the [1:] is specific to current MNLogit endog_names = ['endog_%d' % i for i in np.unique(result.model.endog)[1:]] if exog_names is None: exog_names = ['var%d' % i for i in range(len(result.params))] # TODO: check formatting options with different values res_params = [[forg(item, prec=4) for item in row] for row in result.params] if extras: extras_list = [[['%10s' % ('(' + forg(v, prec=3).strip() + ')') for v in col] for col in getattr(result, what)] for what in extras ] data = lzip(res_params, *extras_list) data = [i for j in data for i in j] #flatten stubs = lzip(endog_names, *[['']*len(endog_names)]*len(extras)) stubs = [i for j in stubs for i in j] #flatten else: data = res_params stubs = endog_names txt_fmt = copy.deepcopy(fmt_params) txt_fmt["data_fmts"] = ["%s"]*result.params.shape[1] return SimpleTable(data, headers=exog_names, stubs=stubs, title=title, txt_fmt=txt_fmt)
def summary_table(self, float_fmt="%6.3f"): '''create a summary table with all influence and outlier measures This does currently not distinguish between statistics that can be calculated from the original regression results and for which a leave-one-observation-out loop is needed Returns ------- res : SimpleTable instance SimpleTable instance with the results, can be printed Notes ----- This also attaches table_data to the instance. ''' #print self.dfbetas # table_raw = [ np.arange(self.nobs), # self.endog, # self.fittedvalues, # self.cooks_distance(), # self.resid_studentized_internal, # self.hat_matrix_diag, # self.dffits_internal, # self.resid_studentized_external, # self.dffits, # self.dfbetas # ] table_raw = [ ('obs', np.arange(self.nobs)), ('endog', self.endog), ('fitted\nvalue', self.results.fittedvalues), ("Cook's\nd", self.cooks_distance[0]), ("student.\nresidual", self.resid_studentized_internal), ('hat diag', self.hat_matrix_diag), ('dffits \ninternal', self.dffits_internal[0]), ("ext.stud.\nresidual", self.resid_studentized_external), ('dffits', self.dffits[0]), ('dfbeta\nslope', self.dfbetas[:, 1] ) #skip needs to partially unravel ] colnames, data = zip(*table_raw) #unzip data = np.column_stack(data) self.table_data = data from statsmodels.iolib.table import SimpleTable, default_html_fmt from statsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + [float_fmt] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] return SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html)
def test_SimpleTable_1(self): # Basic test, test_SimpleTable_1 desired = ''' ===================== header1 header2 --------------------- stub1 1.30312 2.73999 stub2 1.95038 2.65765 --------------------- ''' test1data = [[1.30312, 2.73999],[1.95038, 2.65765]] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') actual = SimpleTable(test1data, test1header, test1stubs, txt_fmt=default_txt_fmt) actual = '\n%s\n' % actual.as_text() self.assertEqual(desired, str(actual))
def test_simple_table_1(self): # Basic test, test_simple_table_1 desired = ''' ===================== header1 header2 --------------------- stub1 1.30312 2.73999 stub2 1.95038 2.65765 --------------------- ''' test1data = [[1.30312, 2.73999],[1.95038, 2.65765]] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') actual = SimpleTable(test1data, test1header, test1stubs, txt_fmt=default_txt_fmt) actual = '\n%s\n' % actual.as_text() assert_equal(desired, str(actual))
def test_default_alignment(self): desired = ''' ===================== header1 header2 --------------------- stub1 1.30312 2.73 stub2 1.95038 2.6 --------------------- ''' test1data = [[1.30312, 2.73], [1.95038, 2.6]] test1stubs = ('stub1', 'stub2') test1header = ('header1', 'header2') actual = SimpleTable(test1data, test1header, test1stubs, txt_fmt=default_txt_fmt) actual = '\n%s\n' % actual.as_text() assert_equal(desired, str(actual))
def summary_quantiles(self, idx, distppf, frac=[0.01, 0.025, 0.05, 0.1, 0.975], varnames=None, title=None): '''summary table for quantiles (critical values) Parameters ---------- idx : None or list of integers List of indices into the Monte Carlo results (columns) that should be used in the calculation distppf : callable probability density function of reference distribution TODO: use `crit` values instead or additional, see summary_cdf frac : array_like, float probabilities for which varnames : None, or list of strings optional list of variable names, same length as idx Returns ------- table : instance of SimpleTable use `print(table` to see results ''' idx = np.atleast_1d(idx) #assure iterable, use list ? quant, mcq = self.quantiles(idx, frac=frac) #not sure whether this will work with single quantile #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T) crit = distppf(np.atleast_2d(quant).T) mml = [] for i, ix in enumerate(idx): #TODO: hardcoded 2 ? mml.extend([mcq[:, i], crit[:, i]]) #mmlar = np.column_stack(mml) mmlar = np.column_stack([quant] + mml) #print(mmlar.shape if title: title = title + ' Quantiles (critical values)' else: title = 'Quantiles (critical values)' #TODO use stub instead if varnames is None: varnames = ['var%d' % i for i in range(mmlar.shape[1] // 2)] headers = ['\nprob'] + [ '%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist'] ] return SimpleTable(mmlar, txt_fmt={ 'data_fmts': ["%#6.3f"] + ["%#10.4f"] * (mmlar.shape[1] - 1) }, title=title, headers=headers)
def summary_news(self, sparsify=True): """ Create summary table showing news from new data since previous results Parameters ---------- sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. Returns ------- updates_table : SimpleTable Table showing new datapoints that were not in the previous results' data. Columns are: - `update date` : date associated with a new data point. - `updated variable` : variable for which new data was added at `update date`. - `forecast (prev)` : the forecast value for the updated variable at the update date in the previous results object (i.e. prior to the data being available). - `observed` : the observed value of the new datapoint. See Also -------- data_updates """ data = pd.merge(self.data_updates, self.news, left_index=True, right_index=True).sort_index().reset_index() data[['update date', 'updated variable']] = (data[['update date', 'updated variable']].applymap(str)) data.iloc[:, 2:] = data.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else '%.2f' % num) # Sparsify the date column if sparsify: mask = data['update date'] == data['update date'].shift(1) data.loc[mask, 'update date'] = '' params_data = data.values params_header = data.columns.tolist() params_stubs = None title = 'News from updated observations:' updates_table = SimpleTable(params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return updates_table
def test_SimpleTable_3(self): # Test SimpleTable.extend() as in extend down desired = ''' ============================== header s1 header d1 ------------------------------ stub R1 C1 10.30312 10.73999 stub R2 C1 90.30312 90.73999 header s2 header d2 ------------------------------ stub R1 C2 50.95038 50.65765 stub R2 C2 40.95038 40.65765 ------------------------------ ''' data1 = [[10.30312, 10.73999], [90.30312, 90.73999]] data2 = [[50.95038, 50.65765], [40.95038, 40.65765]] stubs1 = ['stub R1 C1', 'stub R2 C1'] stubs2 = ['stub R1 C2', 'stub R2 C2'] header1 = ['header s1', 'header d1'] header2 = ['header s2', 'header d2'] actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt) actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt) actual1.extend(actual2) actual = '\n%s\n' % actual1.as_text() self.assertEqual(desired, str(actual))
def summary_cdf(self, idx, frac, crit, varnames=None, title=None): '''summary table for cumulative density function Parameters ---------- idx : None or list of integers List of indices into the Monte Carlo results (columns) that should be used in the calculation frac : array_like, float probabilities for which crit : array_like values for which cdf is calculated varnames : None, or list of strings optional list of variable names, same length as idx Returns ------- table : instance of SimpleTable use `print(table` to see results ''' idx = np.atleast_1d(idx) #assure iterable, use list ? mml = [] #TODO:need broadcasting in cdf for i in range(len(idx)): #print(i, mc1.cdf(crit[:,i], [idx[i]])[1].ravel() mml.append(self.cdf(crit[:, i], [idx[i]])[1].ravel()) #mml = self.cdf(crit, idx)[1] #mmlar = np.column_stack(mml) #print(mml[0].shape, np.shape(frac) mmlar = np.column_stack([frac] + mml) #print(mmlar.shape if title: title = title + ' Probabilites' else: title = 'Probabilities' #TODO use stub instead #headers = ['\nprob'] + ['var%d\n%s' % (i, t) for i in range(mmlar.shape[1]-1) for t in ['mc']] if varnames is None: varnames = ['var%d' % i for i in range(mmlar.shape[1] - 1)] headers = ['prob'] + varnames return SimpleTable(mmlar, txt_fmt={ 'data_fmts': ["%#6.3f"] + ["%#10.4f"] * (np.array(mml).shape[1] - 1) }, title=title, headers=headers)
def generate_table(left_col, right_col, table_title): # Do not use column headers col_headers = None # Generate the right table if right_col: # Add padding if len(right_col) < len(left_col): right_col += [(' ', ' ')] * (len(left_col) - len(right_col)) elif len(right_col) > len(left_col): left_col += [(' ', ' ')] * (len(right_col) - len(left_col)) right_col = [('%-21s' % (' '+k), v) for k,v in right_col] # Generate the right table gen_stubs_right, gen_data_right = zip_longest(*right_col) gen_table_right = SimpleTable(gen_data_right, col_headers, gen_stubs_right, title = table_title, txt_fmt = fmt_2cols) else: # If there is no right table set the right table to empty gen_table_right = [] # Generate the left table gen_stubs_left, gen_data_left = zip_longest(*left_col) gen_table_left = SimpleTable(gen_data_left, col_headers, gen_stubs_left, title = table_title, txt_fmt = fmt_2cols) # Merge the left and right tables to make a single table gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table
def test__repr_latex(self): desired = r""" \begin{center} \begin{tabular}{lcc} \toprule & \textbf{header1} & \textbf{header2} \\ \midrule \textbf{stub1} & 5.394 & 29.3 \\ \textbf{stub2} & 343 & 34.2 \\ \bottomrule \end{tabular} \end{center} """ testdata = [[5.394, 29.3], [343, 34.2]] teststubs = ('stub1', 'stub2') testheader = ('header1', 'header2') tbl = SimpleTable(testdata, testheader, teststubs, txt_fmt=default_txt_fmt) actual = '\n%s\n' % tbl._repr_latex_() assert_equal(actual, desired)
def summary_proc(self, g): """ For internal use """ if self.exog != None: myTitle = ('exog = ' + str(self.groups[g]) + '\n') else: myTitle = "Kaplan-Meier Curve" table = np.transpose(self.results[g]) table = np.c_[np.transpose(self.ts[g]),table] table = SimpleTable(table, headers=['Time','Survival','Std. Err'], title = myTitle) print(table)
def summary_find_nfact(self): '''provides a summary for the selection of the number of factors Returns ------- sumstr : string summary of the results for selecting the number of factors ''' if not hasattr(self, 'results_find_nfact'): self.fit_find_nfact() results = self.results_find_nfact sumstr = '' sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O' # best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0), # np.argmin(results[:,-1],0))] sumstr += '\n' + ' '*19 + '%5d %4d %6d %5d' % tuple(self.best_nfact) from statsmodels.iolib.table import (SimpleTable, default_txt_fmt, default_latex_fmt, default_html_fmt) headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ') numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f' txt_fmt1 = dict(data_fmts = numformat) tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1) sumstr += '\n' + "PCA regression on simulated data," sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables" sumstr += '\n' + tabl.__str__() sumstr += '\n' + "Notes: k is number of components of PCA," sumstr += '\n' + " constant is added additionally" sumstr += '\n' + " k=0 means regression on constant only" sumstr += '\n' + " L1O: sum of squared prediction errors for leave-one-out" return sumstr
def summary(self, xname=None, title='Summarize the Loistic Regression Results', alpha=.05): """Summarize the Regression Results """ exog_idx = lrange(len(self.params)) params = self.params std_err = self.bse tvalues = self.tvalues pvalues = self.pvalues conf_int = self.conf_int try: vif = self.vif except: vif = np.ones(len(self.params)) if self.use_t: param_header = [ 'coef', 'std err', 't', 'P>|t|', '[' + str(alpha / 2), str(1 - alpha / 2) + ']', 'vif' ] else: param_header = [ 'coef', 'std err', 'z', 'P>|z|', '[' + str(alpha / 2), str(1 - alpha / 2) + ']', 'vif' ] if xname is None: xname = ['x_%d' % i for i in range(len(self.params))] xname[0] = 'const' else: xname = xname if len(xname) != len(params): raise ValueError('xnames and params do not have the same length') params_stubs = xname params_data = lzip([self.forg(params[i], 4) for i in exog_idx], [self.forg(std_err[i]) for i in exog_idx], [self.forg(tvalues[i]) for i in exog_idx], [self.forg(pvalues[i]) for i in exog_idx], [self.forg(conf_int[i, 0]) for i in exog_idx], [self.forg(conf_int[i, 1]) for i in exog_idx], [self.forg(vif[i]) for i in exog_idx]) parameter_table = SimpleTable(params_data, param_header, params_stubs, title=title) return parameter_table
def _top_table( top_left: Sequence[Tuple[str, str]], top_right: Sequence[Tuple[str, str]], title: str, ) -> SimpleTable: stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) fmt = fmt_2cols.copy() fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) return table
def quality_metrics(y, y_pred): ''' Quality metrics for binary classification in pretty table ''' from statsmodels.iolib.table import SimpleTable from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix mv = [] mv.append(['Accuracy', round(accuracy_score(y, y_pred), 2)]) mv.append(['Precision', round(precision_score(y, y_pred), 2)]) mv.append(['Recall', round(recall_score(y, y_pred), 2)]) mv.append(['F1', round(f1_score(y, y_pred), 2)]) # Metrics print(SimpleTable(mv, ['Metric', 'Value'])) # Confusion matrix cm = confusion_matrix(y, y_pred) cmp = cm*100/cm.sum() cmp = np.round_(cmp, 2) print(SimpleTable(np.append([['Negative_Model','Positive_Model']], cm, axis=0).T, ['Amount','Negative_Real','Positive_Real'])) print(SimpleTable(np.append([['Negative_Model','Positive_Model']], cmp, axis=0).T, ['Percent','Negative_Real','Positive_Real']))
def summary(self): title = self.title + ". " + self.h0 + ". " \ + self.conclusion_str + self.signif_str + "." data_fmt = {"data_fmts": ["%#0.4g", "%#0.4g", "%#0.3F", "%s"]} html_data_fmt = dict(data_fmt) html_data_fmt["data_fmts"] = ["<td>" + i + "</td>" for i in html_data_fmt["data_fmts"]] return SimpleTable(data=[[self.test_statistic, self.crit_value, self.pvalue, str(self.df)]], headers=['Test statistic', 'Critical value', 'p-value', 'df'], title=title, txt_fmt=data_fmt, html_fmt=html_data_fmt, ltx_fmt=data_fmt)
def summary(self) -> Summary: """Summary of test, containing statistic, p-value and critical values""" if self.bandwidth == int(self.bandwidth): bw = str(int(self.bandwidth)) else: bw = f"{self.bandwidth:0.3f}" table_data = [ ("Test Statistic", f"{self.stat:0.3f}"), ("P-value", f"{self.pvalue:0.3f}"), ("Kernel", f"{self.kernel}"), ("Bandwidth", bw), ] title = self.name table = SimpleTable( table_data, stubs=None, title=title, colwidths=18, datatypes=[0, 1], data_aligns=("l", "r"), ) smry = Summary() smry.tables.append(table) cv_string = "Critical Values: " for val in self.critical_values.keys(): p = str(int(val)) + "%" cv_string += f"{self.critical_values[val]:0.2f}" cv_string += " (" + p + ")" cv_string += ", " # Remove trailing ,<space> cv_string = cv_string[:-2] extra_text = [ "Trend: " + TREND_DESCRIPTION[self._trend], cv_string, "Null Hypothesis: " + self.null_hypothesis, "Alternative Hypothesis: " + self.alternative_hypothesis, "Distribution Order: " + str(self.distribution_order), ] smry.add_extra_txt(extra_text) return smry
def print_summary(self, stats, orientation='auto'): #TODO: need to specify a table formating for the numbers, using defualt title = 'Summary Statistics' header = stats stubs = self.univariate['obs'][1] data = [[self.univariate[astat][2][col] for astat in stats] for col in range(len(self.univariate['obs'][2]))] if (orientation == 'varcols') or \ (orientation == 'auto' and len(stubs) < len(header)): #swap rows and columns data = lmap(lambda *row: list(row), *data) header, stubs = stubs, header part_fmt = dict(data_fmts=["%#8.4g"] * (len(header) - 1)) table = SimpleTable(data, header, stubs, title=title, txt_fmt=part_fmt) return table
def ar_params(self): """ Printing the AR coefficient terms. """ # ywcDF = pd.DataFrame(np.round(self.yw_coef,4), # columns=['Coefficient'], index=range(1,len(self.yw_coef)+1)) # ywsDF = pd.DataFrame(np.round(self.yw_std,4), # columns=['Std Err'], index=range(1,len(self.yw_coef)+1)) # ywtvDF = pd.DataFrame(np.round(ywcDF.values/ywsDF.values,4) # , columns=['t Value'], index=range(1,len(self.yw_coef)+1)) # ywAll = pd.concat([ywcDF, ywsDF, ywtvDF], axis=1, join='outer'); ywAll.index.name = 'Lag' data = list( zip(np.arange(1, self.ar + 1), self.yw_coef, self.yw_std, self.yw_coef / self.yw_std)) tbl = SimpleTable(data, ["Lag", "Coefficient", "Standard Error", "t Value"], title="Estimates of Autoregressive Parameters") return tbl
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + " - " + model.volatility.name # Summary Header top_left = [ ("Dep. Variable:", self._dep_name), ("Mean Model:", model.name), ("Vol Model:", model.volatility.name), ("Distribution:", model.distribution.name), ("Method:", "Maximum Likelihood"), ("", ""), ("Date:", self._datetime.strftime("%a, %b %d %Y")), ("Time:", self._datetime.strftime("%H:%M:%S")), ] top_right = [ ("R-squared:", "%#8.3f" % self.rsquared), ("Adj. R-squared:", "%#8.3f" % self.rsquared_adj), ("Log-Likelihood:", "%#10.6g" % self.loglikelihood), ("AIC:", "%#10.6g" % self.aic), ("BIC:", "%#10.6g" % self.bic), ("No. Observations:", self._nobs), ("Df Residuals:", self.nobs - self.num_params), ("Df Model:", self.num_params), ] title = model_name + " Model Results" stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append("[" + format_float_fixed(c[0], 7, 3) + "," + format_float_fixed(c[1], 7, 3) + "]") stubs = self._names header = ["coef", "std err", "t", "P>|t|", "95.0% Conf. Int."] vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str) formats = [(10, 4), (9, 3), (9, 3), (9, 3), None] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ("Mean Model", "Volatility Model", "Distribution") total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total : total + count] table_stubs = stubs[total : total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ("Covariance estimator: " + self.cov_type,) smry.add_extra_txt(extra_text) return smry
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright #time and names are always included import time time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) #create dictionary with default #use lambdas because some values raise exception if they are not available #alternate spellings are commented out to force unique labels default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), #('Model type:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), #('No. of Observations:', lambda: ["%#6d" % results.nobs]), ('No. Observations:', lambda: ["%#6d" % results.nobs]), #('Df model:', lambda: [results.df_model]), ('Df Model:', lambda: ["%#6d" % results.df_model]), #TODO: check when we have non-integer df ('Df Residuals:', lambda: ["%#6d" % results.df_resid]), #('Df resid:', lambda: [results.df_resid]), #('df resid:', lambda: [results.df_resid]), #check capitalization ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception #('Method:', lambda: [???]), #no default for this ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: #default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None) ('Df model:', None), ('Df resid:', None)] try: llf = results.llf gen_left.append(('Log-Likelihood', None)) except: #AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore #replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() #let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ #check missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values #pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): #fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): #fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) #padding in SimpleTable doesn't work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = fmt_2cols #gen_fmt ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table #, gen_table_left, gen_table_right
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + " - " + model.volatility.name # Summary Header top_left = [ ("Dep. Variable:", self._dep_name), ("Mean Model:", model.name), ("Vol Model:", model.volatility.name), ("Distribution:", model.distribution.name), ("Method:", "User-specified Parameters"), ("", ""), ("Date:", self._datetime.strftime("%a, %b %d %Y")), ("Time:", self._datetime.strftime("%H:%M:%S")), ] top_right = [ ("R-squared:", "--"), ("Adj. R-squared:", "--"), ("Log-Likelihood:", "%#10.6g" % self.loglikelihood), ("AIC:", "%#10.6g" % self.aic), ("BIC:", "%#10.6g" % self.bic), ("No. Observations:", self._nobs), ("", ""), ("", ""), ] title = model_name + " Model Results" stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = self._names header = ["coef"] vals = (self.params,) formats = [(10, 4)] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ("Mean Model", "Volatility Model", "Distribution") total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total : total + count] table_stubs = stubs[total : total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ( "Results generated with user-specified parameters.", "Since the model was not estimated, there are no std. " "errors.", ) smry.add_extra_txt(extra_text) return smry
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'User-specified Parameters'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [('R-squared:', '--'), ('Adj. R-squared:', '--'), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('', ''), ('', ''),] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = self._names header = ['coef'] vals = (self.params,) formats = [(10, 4)] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ('Results generated with user-specified parameters.', 'Since the model was not estimated, there are no std. ' 'errors.') smry.add_extra_txt(extra_text) return smry
def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ import time as time #TODO Make sure all self.model.__class__.__name__ are listed model_types = {'OLS' : 'Ordinary least squares', 'GLS' : 'Generalized least squares', 'GLSAR' : 'Generalized least squares with AR(p)', 'WLS' : 'Weigthed least squares', 'RLM' : 'Robust linear model', 'GLM' : 'Generalized linear model' } model_methods = {'OLS' : 'Least Squares', 'GLS' : 'Least Squares', 'GLSAR' : 'Least Squares', 'WLS' : 'Least Squares', 'RLM' : '?', 'GLM' : '?' } if title==0: title = model_types[self.model.__class__.__name__] if yname is None: try: yname = self.model.endog_names except AttributeError: yname = 'y' if xname is None: try: xname = self.model.exog_names except AttributeError: xname = ['var_%d' % i for i in range(len(self.params))] time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ #dist_family = self.model.family.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ #TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), #What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None ## gen_stubs_left = ('Model type:', ## 'Date:', ## 'Dependent Variable:', ## 'df model' ## ) ## gen_data_left = [[modeltype], ## [date], ## yname, #What happens with multiple names? ## [df_model] ## ] gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title = gen_title, txt_fmt = gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid' ) gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title = gen_title, txt_fmt = gen_fmt ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left #Parameters part of the summary table #------------------------------------ #Note: this is not necessary since we standardized names, only t versus normal tstats = {'OLS' : self.t(), 'GLS' : self.t(), 'GLSAR' : self.t(), 'WLS' : self.t(), 'RLM' : self.t(), 'GLM' : self.t() } prob_stats = {'OLS' : self.pvalues, 'GLS' : self.pvalues, 'GLSAR' : self.pvalues, 'WLS' : self.pvalues, 'RLM' : self.pvalues, 'GLM' : self.pvalues } #Dictionary to store the header names for the parameter part of the #summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = xrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = zip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \ exog_len] ) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title = None, txt_fmt = fmt_2, #gen_fmt, ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def ols_to_csv(): """ exports ols summary data to csv """ pass def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table pass printers = {'OLS': ols_printer, 'GLM' : glm_printer } if returns=='print': try: return printers[modeltype]() except KeyError: return printers['OLS']()
def summary(self): """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + ' - ' + model.volatility.name # Summary Header top_left = [('Dep. Variable:', self._dep_name), ('Mean Model:', model.name), ('Vol Model:', model.volatility.name), ('Distribution:', model.distribution.name), ('Method:', 'Maximum Likelihood'), ('', ''), ('Date:', self._datetime.strftime('%a, %b %d %Y')), ('Time:', self._datetime.strftime('%H:%M:%S'))] top_right = [('R-squared:', '%#8.3f' % self.rsquared), ('Adj. R-squared:', '%#8.3f' % self.rsquared_adj), ('Log-Likelihood:', '%#10.6g' % self.loglikelihood), ('AIC:', '%#10.6g' % self.aic), ('BIC:', '%#10.6g' % self.bic), ('No. Observations:', self._nobs), ('Df Residuals:', self.nobs - self.num_params), ('Df Model:', self.num_params)] title = model_name + ' Model Results' stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt['data_fmts'][1] = '%18s' top_right = [('%-21s' % (' ' + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append('[' + format_float_fixed(c[0], 7, 3) + ',' + format_float_fixed(c[1], 7, 3) + ']') stubs = self._names header = ['coef', 'std err', 't', 'P>|t|', '95.0% Conf. Int.'] vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str) formats = [(10, 4), (9, 3), (9, 3), (9, 3), None] pos = 0 param_table_data = [] for _ in range(len(vals[0])): row = [] for i, val in enumerate(vals): if isinstance(val[pos], np.float64): converted = format_float_fixed(val[pos], *formats[i]) else: converted = val[pos] row.append(converted) pos += 1 param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ('Mean Model', 'Volatility Model', 'Distribution') total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total:total + count] table_stubs = stubs[total:total + count] total += count table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title) smry.tables.append(table) extra_text = ['Covariance estimator: ' + self.cov_type] if self.convergence_flag: extra_text.append(""" WARNING: The optimizer did not indicate sucessful convergence. The message was {string_message}. See convergence_flag.""".format( string_message=self._optim_output[-1])) smry.add_extra_txt(extra_text) return smry