Ejemplo n.º 1
0
    def summary_find_nfact(self):
        '''provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        '''
        if not hasattr(self, 'results_find_nfact'):
            self.fit_find_nfact()

        results = self.results_find_nfact
        sumstr = ''
        sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O'
        #        best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
        #                     np.argmin(results[:,-1],0))]

        sumstr += '\n' + ' ' * 19 + '%5d %4d %6d %5d' % tuple(self.best_nfact)

        from statsmodels.iolib.table import SimpleTable

        headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
        numformat = ['%6d'] + ['%10.3f'] * 4  #'%10.4f'
        txt_fmt1 = dict(data_fmts=numformat)
        tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

        sumstr += '\n' + "PCA regression on simulated data,"
        sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables"
        sumstr += '\n' + tabl.__str__()
        sumstr += '\n' + "Notes: k is number of components of PCA,"
        sumstr += '\n' + "       constant is added additionally"
        sumstr += '\n' + "       k=0 means regression on constant only"
        sumstr += '\n' + "       L1O: sum of squared prediction errors for leave-one-out"
        return sumstr
Ejemplo n.º 2
0
    def test_simple_table_3(self):
        # Test SimpleTable.extend() as in extend down
        desired = '''
==============================
           header s1 header d1
------------------------------
stub R1 C1  10.30312  10.73999
stub R2 C1  90.30312  90.73999
           header s2 header d2
------------------------------
stub R1 C2  50.95038  50.65765
stub R2 C2  40.95038  40.65765
------------------------------
'''
        data1 = [[10.30312, 10.73999], [90.30312, 90.73999]]
        data2 = [[50.95038, 50.65765], [40.95038, 40.65765]]
        stubs1 = ['stub R1 C1', 'stub R2 C1']
        stubs2 = ['stub R1 C2', 'stub R2 C2']
        header1 = ['header s1', 'header d1']
        header2 = ['header s2', 'header d2']
        actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt)
        actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt)
        actual1.extend(actual2)
        actual = '\n%s\n' % actual1.as_text()
        assert_equal(desired, str(actual))
Ejemplo n.º 3
0
def _df_to_simpletable(
    df,
    align="r",
    float_format="%.4f",
    header=True,
    index=True,
    table_dec_above="-",
    table_dec_below=None,
    header_dec_below="-",
    pad_col=0,
    pad_index=0,
):
    dat = df.copy()
    dat = dat.applymap(lambda x: _formatter(x, float_format))
    if header:
        headers = [str(x) for x in dat.columns.tolist()]
    else:
        headers = None
    if index:
        stubs = [str(x) + int(pad_index) * " " for x in dat.index.tolist()]
    else:
        dat.ix[:, 0] = [str(x) + int(pad_index) * " " for x in dat.ix[:, 0]]
        stubs = None
    st = SimpleTable(
        np.array(dat), headers=headers, stubs=stubs, ltx_fmt=fmt_latex, txt_fmt=fmt_txt
    )
    st.output_formats["latex"]["data_aligns"] = align
    st.output_formats["txt"]["data_aligns"] = align
    st.output_formats["txt"]["table_dec_above"] = table_dec_above
    st.output_formats["txt"]["table_dec_below"] = table_dec_below
    st.output_formats["txt"]["header_dec_below"] = header_dec_below
    st.output_formats["txt"]["colsep"] = " " * int(pad_col + 1)
    return st
Ejemplo n.º 4
0
def _df_to_simpletable(df,
                       align='r',
                       float_format="%.4f",
                       header=True,
                       index=True,
                       table_dec_above='-',
                       table_dec_below=None,
                       header_dec_below='-',
                       pad_col=0,
                       pad_index=0):
    dat = df.copy()
    dat = dat.applymap(lambda x: _formatter(x, float_format))
    if header:
        headers = [str(x) for x in dat.columns.tolist()]
    else:
        headers = None
    if index:
        stubs = [str(x) + int(pad_index) * ' ' for x in dat.index.tolist()]
    else:
        dat.iloc[:,
                 0] = [str(x) + int(pad_index) * ' ' for x in dat.iloc[:, 0]]
        stubs = None
    st = SimpleTable(np.array(dat),
                     headers=headers,
                     stubs=stubs,
                     ltx_fmt=fmt_latex,
                     txt_fmt=fmt_txt)
    st.output_formats['latex']['data_aligns'] = align
    st.output_formats['txt']['data_aligns'] = align
    st.output_formats['txt']['table_dec_above'] = table_dec_above
    st.output_formats['txt']['table_dec_below'] = table_dec_below
    st.output_formats['txt']['header_dec_below'] = header_dec_below
    st.output_formats['txt']['colsep'] = ' ' * int(pad_col + 1)
    return st
Ejemplo n.º 5
0
    def acorr_estimates(self):
        """
        Printing the AR coefficient terms to match sas output
        """
        # autoCovDf = pd.DataFrame(np.round(self.estimated_acov,8),
        #              columns=['Covariance'], index=range(1,len(self.estimated_acov)+1))
        # autoCorrDf = pd.DataFrame(np.round(self.estimated_acorr,8),
        #                      columns=['Correlation'], index=range(1,len(self.estimated_acorr)+1))
        # out = pd.concat([autoCovDf, autoCorrDf], axis=1, join='outer'); out.index.name = 'Lag'

        pic = []
        for i in self.estimated_acorr:
            if (i == 1):
                pic.append(" " * 20 + "|" + "*" * 20)
            elif (i < 0):
                temp = int(np.abs(np.round(i * 20, 0)))
                pic.append(' ' * (20 - temp) + '*' * temp + "|" + " " * 20)
                #print( ' '*(20 - temp) + '*' * temp)
            else:
                temp = int(np.abs(np.round(i * 20, 0)))
                pic.append(' ' * 20 + "|" + '*' * temp + " " * (20 - temp))

        data = list(
            zip(np.arange(0, self.ar + 1), self.estimated_acov,
                self.estimated_acorr, pic))
        tbl = SimpleTable(data, [
            "Lag", "Autocovariance", "Autocorrelation",
            "-1" + " " * 18 + "0" + " " * 19 + "1"
        ],
                          title="Estimates of Autocorrelations")

        return tbl
Ejemplo n.º 6
0
    def test_customlabel(self):
        # Limited test of custom custom labeling
        tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1)
        tbl[1][1].data = np.nan
        tbl.label_cells(custom_labeller)
        #print([[c.datatype for c in row] for row in tbl])
        desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    --   *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
        actual = '\n%s\n' % tbl.as_text(missing='--')
        self.assertEqual(actual, desired)
Ejemplo n.º 7
0
def sm_summary_restrict(summary, dropFE=False, alpha=None):
    '''
    Drops fixed effects or nonsignificant predictors. Can do both simultaneously
    input:
        * summary - statsmodels summary() object (i.e. smf.ols(formula).fit().summary() )
        * dropFE - True/False, default is False - drop fixed effects (any predictor with 'C(' in name
        * alpha - Float, alpha value to drop parameters at (i.e. if predictor is 0.08 and alpha=0.05, predictor will not be included) 
    output:
        * summary - statsmodels summary() object
    '''
    from statsmodels.iolib.table import SimpleTable
    tblheader = summary.tables[1].data[0]
    tbldata = []
    for r in summary.tables[1].data[1:]:
        if dropFE == True:
            if 'C(' in r[0]:
                continue
            else:
                pass
        if alpha != None:
            if float(r[-2]) < alpha:
                tbldata.append(r)
        else:
            tbldata.append(r)
    #Add back in our restricted table
    summary.tables[1] = SimpleTable(tbldata, tblheader)
    return summary
Ejemplo n.º 8
0
    def summary(self) -> SimpleTable:
        """
        Summary table of the descriptive statistics

        Returns
        -------
        SimpleTable
            A table instance supporting export to text, csv and LaTeX
        """
        df = self.frame.astype(object)
        df = df.fillna("")
        cols = [str(col) for col in df.columns]
        stubs = [str(idx) for idx in df.index]
        data = []
        for _, row in df.iterrows():
            data.append([v for v in row])

        def _formatter(v):
            if isinstance(v, str):
                return v
            elif v // 1 == v:
                return str(int(v))
            return f"{v:0.4g}"

        return SimpleTable(
            data,
            header=cols,
            stubs=stubs,
            title="Descriptive Statistics",
            txt_fmt={"data_fmts": {0: "%s", 1: _formatter}},
            datatypes=[1] * len(data),
        )
Ejemplo n.º 9
0
    def _param_table(
        params: NDArray,
        se: NDArray,
        tstats: NDArray,
        pvalues: NDArray,
        stubs: Sequence[str],
        title: str,
    ) -> SimpleTable:
        ci = params[:, None] + se[:, None] * stats.norm.ppf([[0.025, 0.975]])
        param_data = np.column_stack([params, se, tstats, pvalues, ci])
        data = []
        for row in param_data:
            txt_row = []
            for i, v in enumerate(row):
                f = str_format
                if i == 3:
                    f = pval_format
                txt_row.append(f(v))
            data.append(txt_row)

        header = [
            "Parameter", "Std. Err.", "T-stat", "P-value", "Lower CI",
            "Upper CI"
        ]
        table = SimpleTable(data,
                            stubs=stubs,
                            txt_fmt=fmt_params,
                            headers=header,
                            title=title)
        return table
Ejemplo n.º 10
0
    def test_customlabel(self):
        # Limited test of custom custom labeling
        tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1)
        tbl[1][1].data = np.nan
        tbl.label_cells(custom_labeller)
        #print([[c.datatype for c in row] for row in tbl])
        desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    --   *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
        actual = '\n%s\n' % tbl.as_text(missing='--')
        assert_equal(actual, desired)
Ejemplo n.º 11
0
    def display(self):

        now = datetime.now()
        pred_hashes = [self._build_xarray_hash(pred) for pred in self.preds]

        data = [
            ("Date:", [now.strftime(self.date_format)]),
            ("Time:", [now.strftime(self.time_format)]),
            ("No. Timepoints:", [self.target.sizes["dt"]]),
            ("Horizon", [self.target.sizes["h"]]),
        ]

        if self.ref_name:
            data += [(f"Reference Metric", [self.ref_name])]

        data += [("Target Hash", [self._build_xarray_hash(self.target)])]

        for name, pred_hash in zip(self.names, pred_hashes):
            # done explicitly to make it clear
            data += [(f'"{name}" Hash', [pred_hash])]

        if self.ref is not None:
            data += [(f"Reference Metric Hash",
                      [self._build_xarray_hash(self.ref)])]

        return SimpleTable(
            data=list(map(itemgetter(1), data)),
            stubs=list(map(itemgetter(0), data)),
            title="Dataset Description",
        ).as_html()
Ejemplo n.º 12
0
    def summary(self):
        """Generates a summary table with basic statistics for each submodel.

        TODO: immprove interface with linearmodel, so that this doesn't rely on
        private methods.
        """
        summary = Summary()
        headers = ['Model form', 'Observations', 'Adjusted r^2', 'P value']
        table_data = []
        # for each model
        for model in self._models:
            row = []
            # populate row with model statistics
            res = model._model._model.fit()
            row.append(model._model.get_model_formula())
            row.append(round(res.nobs))
            row.append(round(res.rsquared_adj, 2))
            row.append(format(res.f_pvalue, '.1E'))
            # append the row to the data
            table_data.append(row)

        # create table with data and headers:w
        table = SimpleTable(data=table_data, headers=headers)
        # add table to summary
        summary.tables.append(table)

        return summary
Ejemplo n.º 13
0
    def test_SimpleTable_special_chars(self):
        # Simple table with characters: (%, >, |, _, $, &, #)
        cell0c_data = 22
        cell1c_data = 1053
        row0c_data = [cell0c_data, cell1c_data]
        row1c_data = [23, 6250.4]
        table1c_data = [row0c_data, row1c_data]
        test1c_stubs = ('>stub1%', 'stub_2')
        test1c_header = ('#header1$', 'header&|')
        tbl_c = SimpleTable(table1c_data,
                            test1c_header,
                            test1c_stubs,
                            ltx_fmt=ltx_fmt1)

        def test_ltx_special_chars(self):
            # Test for special characters (latex) in headers and stubs
            desired = r"""
\begin{tabular}{lcc}
\toprule
                    & \textbf{\#header1\$} & \textbf{header\&$|$}  \\
\midrule
\textbf{$>$stub1\%} &          22          &         1053          \\
\textbf{stub\_2}    &          23          &        6250.4         \\
\bottomrule
\end{tabular}
"""
            actual = '\n%s\n' % tbl_c.as_latex_tabular(center=False)
            self.assertEqual(actual, desired)

        test_ltx_special_chars(self)
Ejemplo n.º 14
0
def feat_imp(cols, fi):
    '''
    Function for amazing showing of feature importances
    
    Input:
    1) cols - list of feature names
    2) fi - np.array of feature importances
    
    Output:
    1) Table with features and their importances;
    2) Vizualization over barplot.
    '''
    
    import numpy as np
    from statsmodels.iolib.table import SimpleTable
    
    fi = np.round(fi, 3)
    indices = np.argsort(fi)[::-1]
    cols = [cols[i] for i in indices]
    
    
    print(SimpleTable(np.append([cols], [fi], axis=0).T,
                      ['Feature','Importance']))
    
    all_colors = list(plt.cm.colors.cnames.keys())
    c = np.random.choice(all_colors, fi.shape[0], replace=False)
    
    plt.figure()
    plt.title('Feature importances')
    plt.bar(range(fi.shape[0]), fi[indices], color=c, width=.5)
    plt.xticks(range(fi.shape[0]), cols, rotation=45)
    plt.show();
Ejemplo n.º 15
0
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None,
                      title=None):
    '''create summary table of regression parameters with several equations

    This allows interleaving of parameters with bse and/or tvalues

    Parameters
    ----------
    result : result instance
        the result instance with params and attributes in extras
    extras : list[str]
        additional attributes to add below a parameter row, e.g. bse or tvalues
    endog_names : {list[str], None}
        names for rows of the parameter array (multivariate endog)
    exog_names : {list[str], None}
        names for columns of the parameter array (exog)
    alpha : float
        level for confidence intervals, default 0.95
    title : None or string

    Returns
    -------
    tables : list of SimpleTable
        this contains a list of all seperate Subtables
    table_all : SimpleTable
        the merged table with results concatenated for each row of the parameter
        array

    '''
    if endog_names is None:
        # TODO: note the [1:] is specific to current MNLogit
        endog_names = ['endog_%d' % i for i in
                       np.unique(result.model.endog)[1:]]
    if exog_names is None:
        exog_names = ['var%d' % i for i in range(len(result.params))]

    # TODO: check formatting options with different values
    res_params = [[forg(item, prec=4) for item in row] for row in result.params]
    if extras:
        extras_list = [[['%10s' % ('(' + forg(v, prec=3).strip() + ')')
                         for v in col]
                        for col in getattr(result, what)]
                       for what in extras
                       ]
        data = lzip(res_params, *extras_list)
        data = [i for j in data for i in j]  #flatten
        stubs = lzip(endog_names, *[['']*len(endog_names)]*len(extras))
        stubs = [i for j in stubs for i in j] #flatten
    else:
        data = res_params
        stubs = endog_names

    txt_fmt = copy.deepcopy(fmt_params)
    txt_fmt["data_fmts"] = ["%s"]*result.params.shape[1]

    return SimpleTable(data, headers=exog_names,
                             stubs=stubs,
                             title=title,
                             txt_fmt=txt_fmt)
Ejemplo n.º 16
0
    def summary_table(self, float_fmt="%6.3f"):
        '''create a summary table with all influence and outlier measures

        This does currently not distinguish between statistics that can be
        calculated from the original regression results and for which a
        leave-one-observation-out loop is needed

        Returns
        -------
        res : SimpleTable instance
           SimpleTable instance with the results, can be printed

        Notes
        -----
        This also attaches table_data to the instance.



        '''
        #print self.dfbetas

        #        table_raw = [ np.arange(self.nobs),
        #                      self.endog,
        #                      self.fittedvalues,
        #                      self.cooks_distance(),
        #                      self.resid_studentized_internal,
        #                      self.hat_matrix_diag,
        #                      self.dffits_internal,
        #                      self.resid_studentized_external,
        #                      self.dffits,
        #                      self.dfbetas
        #                      ]
        table_raw = [
            ('obs', np.arange(self.nobs)),
            ('endog', self.endog),
            ('fitted\nvalue', self.results.fittedvalues),
            ("Cook's\nd", self.cooks_distance[0]),
            ("student.\nresidual", self.resid_studentized_internal),
            ('hat diag', self.hat_matrix_diag),
            ('dffits \ninternal', self.dffits_internal[0]),
            ("ext.stud.\nresidual", self.resid_studentized_external),
            ('dffits', self.dffits[0]),
            ('dfbeta\nslope', self.dfbetas[:, 1]
             )  #skip needs to partially unravel
        ]
        colnames, data = zip(*table_raw)  #unzip
        data = np.column_stack(data)
        self.table_data = data
        from statsmodels.iolib.table import SimpleTable, default_html_fmt
        from statsmodels.iolib.tableformatting import fmt_base
        from copy import deepcopy
        fmt = deepcopy(fmt_base)
        fmt_html = deepcopy(default_html_fmt)
        fmt['data_fmts'] = ["%4d"] + [float_fmt] * (data.shape[1] - 1)
        #fmt_html['data_fmts'] = fmt['data_fmts']
        return SimpleTable(data,
                           headers=colnames,
                           txt_fmt=fmt,
                           html_fmt=fmt_html)
Ejemplo n.º 17
0
    def test_SimpleTable_1(self):
        # Basic test, test_SimpleTable_1
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312 2.73999
stub2 1.95038 2.65765
---------------------
'''
        test1data = [[1.30312, 2.73999],[1.95038, 2.65765]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data, test1header, test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        self.assertEqual(desired, str(actual))
Ejemplo n.º 18
0
    def test_simple_table_1(self):
        # Basic test, test_simple_table_1
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312 2.73999
stub2 1.95038 2.65765
---------------------
'''
        test1data = [[1.30312, 2.73999],[1.95038, 2.65765]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data, test1header, test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        assert_equal(desired, str(actual))
Ejemplo n.º 19
0
    def test_default_alignment(self):
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312    2.73
stub2 1.95038     2.6
---------------------
'''
        test1data = [[1.30312, 2.73], [1.95038, 2.6]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data,
                             test1header,
                             test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        assert_equal(desired, str(actual))
Ejemplo n.º 20
0
    def summary_quantiles(self,
                          idx,
                          distppf,
                          frac=[0.01, 0.025, 0.05, 0.1, 0.975],
                          varnames=None,
                          title=None):
        '''summary table for quantiles (critical values)

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distppf : callable
            probability density function of reference distribution
            TODO: use `crit` values instead or additional, see summary_cdf
        frac : array_like, float
            probabilities for which
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print(table` to see results

        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        quant, mcq = self.quantiles(idx, frac=frac)
        #not sure whether this will work with single quantile
        #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T)
        crit = distppf(np.atleast_2d(quant).T)
        mml = []
        for i, ix in enumerate(idx):  #TODO: hardcoded 2 ?
            mml.extend([mcq[:, i], crit[:, i]])
        #mmlar = np.column_stack(mml)
        mmlar = np.column_stack([quant] + mml)
        #print(mmlar.shape
        if title:
            title = title + ' Quantiles (critical values)'
        else:
            title = 'Quantiles (critical values)'
        #TODO use stub instead
        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1] // 2)]
        headers = ['\nprob'] + [
            '%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist']
        ]
        return SimpleTable(mmlar,
                           txt_fmt={
                               'data_fmts':
                               ["%#6.3f"] + ["%#10.4f"] * (mmlar.shape[1] - 1)
                           },
                           title=title,
                           headers=headers)
Ejemplo n.º 21
0
    def summary_news(self, sparsify=True):
        """
        Create summary table showing news from new data since previous results

        Parameters
        ----------
        sparsify : bool, optional, default True
            Set to False for the table to include every one of the multiindex
            keys at each row.

        Returns
        -------
        updates_table : SimpleTable
            Table showing new datapoints that were not in the previous results'
            data. Columns are:

            - `update date` : date associated with a new data point.
            - `updated variable` : variable for which new data was added at
              `update date`.
            - `forecast (prev)` : the forecast value for the updated variable
              at the update date in the previous results object (i.e. prior to
              the data being available).
            - `observed` : the observed value of the new datapoint.

        See Also
        --------
        data_updates
        """
        data = pd.merge(self.data_updates,
                        self.news,
                        left_index=True,
                        right_index=True).sort_index().reset_index()
        data[['update date',
              'updated variable']] = (data[['update date',
                                            'updated variable']].applymap(str))
        data.iloc[:, 2:] = data.iloc[:, 2:].applymap(
            lambda num: '' if pd.isnull(num) else '%.2f' % num)

        # Sparsify the date column
        if sparsify:
            mask = data['update date'] == data['update date'].shift(1)
            data.loc[mask, 'update date'] = ''

        params_data = data.values
        params_header = data.columns.tolist()
        params_stubs = None

        title = 'News from updated observations:'
        updates_table = SimpleTable(params_data,
                                    params_header,
                                    params_stubs,
                                    txt_fmt=fmt_params,
                                    title=title)

        return updates_table
Ejemplo n.º 22
0
    def test_SimpleTable_3(self):
        # Test SimpleTable.extend() as in extend down
        desired = '''
==============================
           header s1 header d1
------------------------------
stub R1 C1  10.30312  10.73999
stub R2 C1  90.30312  90.73999
           header s2 header d2
------------------------------
stub R1 C2  50.95038  50.65765
stub R2 C2  40.95038  40.65765
------------------------------
'''
        data1 = [[10.30312, 10.73999], [90.30312, 90.73999]]
        data2 = [[50.95038, 50.65765], [40.95038, 40.65765]]
        stubs1 = ['stub R1 C1', 'stub R2 C1']
        stubs2 = ['stub R1 C2', 'stub R2 C2']
        header1 = ['header s1', 'header d1']
        header2 = ['header s2', 'header d2']
        actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt)
        actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt)
        actual1.extend(actual2)
        actual = '\n%s\n' % actual1.as_text()
        self.assertEqual(desired, str(actual))
Ejemplo n.º 23
0
    def summary_cdf(self, idx, frac, crit, varnames=None, title=None):
        '''summary table for cumulative density function


        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        frac : array_like, float
            probabilities for which
        crit : array_like
            values for which cdf is calculated
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print(table` to see results


        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        mml = []
        #TODO:need broadcasting in cdf
        for i in range(len(idx)):
            #print(i, mc1.cdf(crit[:,i], [idx[i]])[1].ravel()
            mml.append(self.cdf(crit[:, i], [idx[i]])[1].ravel())
        #mml = self.cdf(crit, idx)[1]
        #mmlar = np.column_stack(mml)
        #print(mml[0].shape, np.shape(frac)
        mmlar = np.column_stack([frac] + mml)
        #print(mmlar.shape
        if title:
            title = title + ' Probabilites'
        else:
            title = 'Probabilities'
        #TODO use stub instead
        #headers = ['\nprob'] + ['var%d\n%s' % (i, t) for i in range(mmlar.shape[1]-1) for t in ['mc']]

        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1] - 1)]
        headers = ['prob'] + varnames
        return SimpleTable(mmlar,
                           txt_fmt={
                               'data_fmts': ["%#6.3f"] + ["%#10.4f"] *
                               (np.array(mml).shape[1] - 1)
                           },
                           title=title,
                           headers=headers)
def generate_table(left_col, right_col, table_title):
    
    # Do not use column headers
    col_headers = None
    
    # Generate the right table
    if right_col:
        # Add padding
        if len(right_col) < len(left_col):
            right_col += [(' ', ' ')] * (len(left_col) - len(right_col))
        elif len(right_col) > len(left_col):
            left_col += [(' ', ' ')] * (len(right_col) - len(left_col))
        right_col = [('%-21s' % ('  '+k), v) for k,v in right_col]
        
        # Generate the right table
        gen_stubs_right, gen_data_right = zip_longest(*right_col)
        gen_table_right = SimpleTable(gen_data_right,
                                          col_headers,
                                          gen_stubs_right,
                                          title = table_title,
                                          txt_fmt = fmt_2cols)
    else:
        # If there is no right table set the right table to empty
        gen_table_right = []

    # Generate the left table  
    gen_stubs_left, gen_data_left = zip_longest(*left_col) 
    gen_table_left = SimpleTable(gen_data_left,
                                 col_headers,
                                 gen_stubs_left,
                                 title = table_title,
                                 txt_fmt = fmt_2cols)

    
    # Merge the left and right tables to make a single table
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table
Ejemplo n.º 25
0
    def test__repr_latex(self):
        desired = r"""
\begin{center}
\begin{tabular}{lcc}
\toprule
               & \textbf{header1} & \textbf{header2}  \\
\midrule
\textbf{stub1} &      5.394       &       29.3        \\
\textbf{stub2} &       343        &       34.2        \\
\bottomrule
\end{tabular}
\end{center}
"""
        testdata = [[5.394, 29.3], [343, 34.2]]
        teststubs = ('stub1', 'stub2')
        testheader = ('header1', 'header2')
        tbl = SimpleTable(testdata,
                          testheader,
                          teststubs,
                          txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % tbl._repr_latex_()
        assert_equal(actual, desired)
Ejemplo n.º 26
0
 def summary_proc(self, g):
     """
     For internal use
     """
     if self.exog != None:
         myTitle = ('exog = ' + str(self.groups[g]) + '\n')
     else:
         myTitle = "Kaplan-Meier Curve"
     table = np.transpose(self.results[g])
     table = np.c_[np.transpose(self.ts[g]),table]
     table = SimpleTable(table, headers=['Time','Survival','Std. Err'],
                         title = myTitle)
     print(table)
Ejemplo n.º 27
0
    def summary_find_nfact(self):
        '''provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        '''
        if not hasattr(self, 'results_find_nfact'):
            self.fit_find_nfact()


        results = self.results_find_nfact
        sumstr = ''
        sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O'
#        best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
#                     np.argmin(results[:,-1],0))]

        sumstr += '\n' + ' '*19 + '%5d %4d %6d %5d' % tuple(self.best_nfact)

        from statsmodels.iolib.table import (SimpleTable, default_txt_fmt,
                                default_latex_fmt, default_html_fmt)

        headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
        numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f'
        txt_fmt1 = dict(data_fmts = numformat)
        tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

        sumstr += '\n' + "PCA regression on simulated data,"
        sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables"
        sumstr += '\n' + tabl.__str__()
        sumstr += '\n' + "Notes: k is number of components of PCA,"
        sumstr += '\n' + "       constant is added additionally"
        sumstr += '\n' + "       k=0 means regression on constant only"
        sumstr += '\n' + "       L1O: sum of squared prediction errors for leave-one-out"
        return sumstr
    def summary(self,
                xname=None,
                title='Summarize the Loistic Regression Results',
                alpha=.05):
        """Summarize the Regression Results
        """
        exog_idx = lrange(len(self.params))
        params = self.params
        std_err = self.bse
        tvalues = self.tvalues
        pvalues = self.pvalues
        conf_int = self.conf_int
        try:
            vif = self.vif
        except:
            vif = np.ones(len(self.params))

        if self.use_t:
            param_header = [
                'coef', 'std err', 't', 'P>|t|', '[' + str(alpha / 2),
                str(1 - alpha / 2) + ']', 'vif'
            ]
        else:
            param_header = [
                'coef', 'std err', 'z', 'P>|z|', '[' + str(alpha / 2),
                str(1 - alpha / 2) + ']', 'vif'
            ]
        if xname is None:
            xname = ['x_%d' % i for i in range(len(self.params))]
            xname[0] = 'const'
        else:
            xname = xname
        if len(xname) != len(params):
            raise ValueError('xnames and params do not have the same length')
        params_stubs = xname

        params_data = lzip([self.forg(params[i], 4) for i in exog_idx],
                           [self.forg(std_err[i]) for i in exog_idx],
                           [self.forg(tvalues[i]) for i in exog_idx],
                           [self.forg(pvalues[i]) for i in exog_idx],
                           [self.forg(conf_int[i, 0]) for i in exog_idx],
                           [self.forg(conf_int[i, 1]) for i in exog_idx],
                           [self.forg(vif[i]) for i in exog_idx])
        parameter_table = SimpleTable(params_data,
                                      param_header,
                                      params_stubs,
                                      title=title)

        return parameter_table
Ejemplo n.º 29
0
    def _top_table(
        top_left: Sequence[Tuple[str, str]],
        top_right: Sequence[Tuple[str, str]],
        title: str,
    ) -> SimpleTable:
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        fmt = fmt_2cols.copy()
        fmt["data_fmts"][1] = "%18s"

        top_right = [("%-21s" % ("  " + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))

        return table
Ejemplo n.º 30
0
def quality_metrics(y, y_pred):
    '''
    Quality metrics for binary classification in pretty table
    '''
    from statsmodels.iolib.table import SimpleTable
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

    mv = []
    mv.append(['Accuracy', round(accuracy_score(y, y_pred), 2)])
    mv.append(['Precision', round(precision_score(y, y_pred), 2)])
    mv.append(['Recall', round(recall_score(y, y_pred), 2)])
    mv.append(['F1', round(f1_score(y, y_pred), 2)])
    
    # Metrics
    print(SimpleTable(mv, ['Metric', 'Value']))
    
    # Confusion matrix
    cm = confusion_matrix(y, y_pred)
    cmp = cm*100/cm.sum()
    cmp = np.round_(cmp, 2)
    print(SimpleTable(np.append([['Negative_Model','Positive_Model']], cm, axis=0).T, 
                      ['Amount','Negative_Real','Positive_Real']))
    print(SimpleTable(np.append([['Negative_Model','Positive_Model']], cmp, axis=0).T, 
                      ['Percent','Negative_Real','Positive_Real']))
Ejemplo n.º 31
0
 def summary(self):
     title = self.title + ". " + self.h0 + ". " \
                               + self.conclusion_str + self.signif_str + "."
     data_fmt = {"data_fmts": ["%#0.4g", "%#0.4g", "%#0.3F", "%s"]}
     html_data_fmt = dict(data_fmt)
     html_data_fmt["data_fmts"] = ["<td>" + i + "</td>"
                                   for i in html_data_fmt["data_fmts"]]
     return SimpleTable(data=[[self.test_statistic, self.crit_value,
                               self.pvalue, str(self.df)]],
                        headers=['Test statistic', 'Critical value',
                                 'p-value', 'df'],
                        title=title,
                        txt_fmt=data_fmt,
                        html_fmt=html_data_fmt,
                        ltx_fmt=data_fmt)
Ejemplo n.º 32
0
    def summary(self) -> Summary:
        """Summary of test, containing statistic, p-value and critical values"""
        if self.bandwidth == int(self.bandwidth):
            bw = str(int(self.bandwidth))
        else:
            bw = f"{self.bandwidth:0.3f}"
        table_data = [
            ("Test Statistic", f"{self.stat:0.3f}"),
            ("P-value", f"{self.pvalue:0.3f}"),
            ("Kernel", f"{self.kernel}"),
            ("Bandwidth", bw),
        ]
        title = self.name

        table = SimpleTable(
            table_data,
            stubs=None,
            title=title,
            colwidths=18,
            datatypes=[0, 1],
            data_aligns=("l", "r"),
        )

        smry = Summary()
        smry.tables.append(table)

        cv_string = "Critical Values: "
        for val in self.critical_values.keys():
            p = str(int(val)) + "%"
            cv_string += f"{self.critical_values[val]:0.2f}"
            cv_string += " (" + p + ")"
            cv_string += ", "
        # Remove trailing ,<space>
        cv_string = cv_string[:-2]

        extra_text = [
            "Trend: " + TREND_DESCRIPTION[self._trend],
            cv_string,
            "Null Hypothesis: " + self.null_hypothesis,
            "Alternative Hypothesis: " + self.alternative_hypothesis,
            "Distribution Order: " + str(self.distribution_order),
        ]

        smry.add_extra_txt(extra_text)
        return smry
Ejemplo n.º 33
0
    def print_summary(self, stats, orientation='auto'):
        #TODO: need to specify a table formating for the numbers, using defualt
        title = 'Summary Statistics'
        header = stats
        stubs = self.univariate['obs'][1]
        data = [[self.univariate[astat][2][col] for astat in stats]
                for col in range(len(self.univariate['obs'][2]))]

        if (orientation == 'varcols') or \
           (orientation == 'auto' and len(stubs) < len(header)):
            #swap rows and columns
            data = lmap(lambda *row: list(row), *data)
            header, stubs = stubs, header

        part_fmt = dict(data_fmts=["%#8.4g"] * (len(header) - 1))
        table = SimpleTable(data, header, stubs, title=title, txt_fmt=part_fmt)

        return table
Ejemplo n.º 34
0
 def ar_params(self):
     """
     Printing the AR coefficient terms.
     """
     # ywcDF = pd.DataFrame(np.round(self.yw_coef,4),
     #              columns=['Coefficient'], index=range(1,len(self.yw_coef)+1))
     # ywsDF = pd.DataFrame(np.round(self.yw_std,4),
     #                      columns=['Std Err'], index=range(1,len(self.yw_coef)+1))
     # ywtvDF = pd.DataFrame(np.round(ywcDF.values/ywsDF.values,4)
     #                       , columns=['t Value'], index=range(1,len(self.yw_coef)+1))
     # ywAll = pd.concat([ywcDF, ywsDF, ywtvDF], axis=1, join='outer'); ywAll.index.name = 'Lag'
     data = list(
         zip(np.arange(1, self.ar + 1), self.yw_coef, self.yw_std,
             self.yw_coef / self.yw_std))
     tbl = SimpleTable(data,
                       ["Lag", "Coefficient", "Standard Error", "t Value"],
                       title="Estimates of Autoregressive Parameters")
     return tbl
Ejemplo n.º 35
0
    def summary(self):
        """
        Constructs a summary of the results from a fit model.

        Returns
        -------
        summary : Summary instance
            Object that contains tables and facilitated export to text, html or
            latex
        """
        # Summary layout
        # 1. Overall information
        # 2. Mean parameters
        # 3. Volatility parameters
        # 4. Distribution parameters
        # 5. Notes

        model = self.model
        model_name = model.name + " - " + model.volatility.name

        # Summary Header
        top_left = [
            ("Dep. Variable:", self._dep_name),
            ("Mean Model:", model.name),
            ("Vol Model:", model.volatility.name),
            ("Distribution:", model.distribution.name),
            ("Method:", "Maximum Likelihood"),
            ("", ""),
            ("Date:", self._datetime.strftime("%a, %b %d %Y")),
            ("Time:", self._datetime.strftime("%H:%M:%S")),
        ]

        top_right = [
            ("R-squared:", "%#8.3f" % self.rsquared),
            ("Adj. R-squared:", "%#8.3f" % self.rsquared_adj),
            ("Log-Likelihood:", "%#10.6g" % self.loglikelihood),
            ("AIC:", "%#10.6g" % self.aic),
            ("BIC:", "%#10.6g" % self.bic),
            ("No. Observations:", self._nobs),
            ("Df Residuals:", self.nobs - self.num_params),
            ("Df Model:", self.num_params),
        ]

        title = model_name + " Model Results"
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        # create summary table instance
        smry = Summary()
        # Top Table
        # Parameter table
        fmt = fmt_2cols
        fmt["data_fmts"][1] = "%18s"

        top_right = [("%-21s" % ("  " + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))
        smry.tables.append(table)

        conf_int = np.asarray(self.conf_int())
        conf_int_str = []
        for c in conf_int:
            conf_int_str.append("[" + format_float_fixed(c[0], 7, 3) + "," + format_float_fixed(c[1], 7, 3) + "]")

        stubs = self._names
        header = ["coef", "std err", "t", "P>|t|", "95.0% Conf. Int."]
        vals = (self.params, self.std_err, self.tvalues, self.pvalues, conf_int_str)
        formats = [(10, 4), (9, 3), (9, 3), (9, 3), None]
        pos = 0
        param_table_data = []
        for _ in range(len(vals[0])):
            row = []
            for i, val in enumerate(vals):
                if isinstance(val[pos], np.float64):
                    converted = format_float_fixed(val[pos], *formats[i])
                else:
                    converted = val[pos]
                row.append(converted)
            pos += 1
            param_table_data.append(row)

        mc = self.model.num_params
        vc = self.model.volatility.num_params
        dc = self.model.distribution.num_params
        counts = (mc, vc, dc)
        titles = ("Mean Model", "Volatility Model", "Distribution")
        total = 0
        for title, count in zip(titles, counts):
            if count == 0:
                continue

            table_data = param_table_data[total : total + count]
            table_stubs = stubs[total : total + count]
            total += count
            table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title)
            smry.tables.append(table)

        extra_text = ("Covariance estimator: " + self.cov_type,)
        smry.add_extra_txt(extra_text)
        return smry
Ejemplo n.º 36
0
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    #time and names are always included
    import time
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    #create dictionary with default
    #use lambdas because some values raise exception if they are not available
    #alternate spellings are commented out to force unique labels
    default_items = dict([
          ('Dependent Variable:', lambda: [yname]),
          ('Dep. Variable:', lambda: [yname]),
          ('Model:', lambda: [results.model.__class__.__name__]),
          #('Model type:', lambda: [results.model.__class__.__name__]),
          ('Date:', lambda: [date]),
          ('Time:', lambda: time_of_day),
          ('Number of Obs:', lambda: [results.nobs]),
          #('No. of Observations:', lambda: ["%#6d" % results.nobs]),
          ('No. Observations:', lambda: ["%#6d" % results.nobs]),
          #('Df model:', lambda: [results.df_model]),
          ('Df Model:', lambda: ["%#6d" % results.df_model]),
          #TODO: check when we have non-integer df
          ('Df Residuals:', lambda: ["%#6d" % results.df_resid]),
          #('Df resid:', lambda: [results.df_resid]),
          #('df resid:', lambda: [results.df_resid]), #check capitalization
          ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception
          #('Method:', lambda: [???]), #no default for this
          ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        #default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None),
                    ('Model type:', None),
                    ('Date:', None),
                    ('No. Observations:', None)
                    ('Df model:', None),
                    ('Df resid:', None)]

        try:
            llf = results.llf
            gen_left.append(('Log-Likelihood', None))
        except: #AttributeError, NotImplementedError
            pass

        gen_right = []


    gen_title = title
    gen_header = None

    #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore
    #replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  #let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  #let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    #check
    missing_values = [k for k,v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    #pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            #fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            #fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        #padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  '+k), v) for k,v in gen_right]
        gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col
        gen_table_right = SimpleTable(gen_data_right,
                                      gen_header,
                                      gen_stubs_right,
                                      title = gen_title,
                                      txt_fmt = fmt_2cols #gen_fmt
                                      )
    else:
        gen_table_right = []  #because .extend_right seems works with []


    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = fmt_2cols
                                 )

    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table #, gen_table_left, gen_table_right
Ejemplo n.º 37
0
    def summary(self):
        """
        Constructs a summary of the results from a fit model.

        Returns
        -------
        summary : Summary instance
            Object that contains tables and facilitated export to text, html or
            latex
        """
        # Summary layout
        # 1. Overall information
        # 2. Mean parameters
        # 3. Volatility parameters
        # 4. Distribution parameters
        # 5. Notes

        model = self.model
        model_name = model.name + " - " + model.volatility.name

        # Summary Header
        top_left = [
            ("Dep. Variable:", self._dep_name),
            ("Mean Model:", model.name),
            ("Vol Model:", model.volatility.name),
            ("Distribution:", model.distribution.name),
            ("Method:", "User-specified Parameters"),
            ("", ""),
            ("Date:", self._datetime.strftime("%a, %b %d %Y")),
            ("Time:", self._datetime.strftime("%H:%M:%S")),
        ]

        top_right = [
            ("R-squared:", "--"),
            ("Adj. R-squared:", "--"),
            ("Log-Likelihood:", "%#10.6g" % self.loglikelihood),
            ("AIC:", "%#10.6g" % self.aic),
            ("BIC:", "%#10.6g" % self.bic),
            ("No. Observations:", self._nobs),
            ("", ""),
            ("", ""),
        ]

        title = model_name + " Model Results"
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        # create summary table instance
        smry = Summary()
        # Top Table
        # Parameter table
        fmt = fmt_2cols
        fmt["data_fmts"][1] = "%18s"

        top_right = [("%-21s" % ("  " + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))
        smry.tables.append(table)

        stubs = self._names
        header = ["coef"]
        vals = (self.params,)
        formats = [(10, 4)]
        pos = 0
        param_table_data = []
        for _ in range(len(vals[0])):
            row = []
            for i, val in enumerate(vals):
                if isinstance(val[pos], np.float64):
                    converted = format_float_fixed(val[pos], *formats[i])
                else:
                    converted = val[pos]
                row.append(converted)
            pos += 1
            param_table_data.append(row)

        mc = self.model.num_params
        vc = self.model.volatility.num_params
        dc = self.model.distribution.num_params
        counts = (mc, vc, dc)
        titles = ("Mean Model", "Volatility Model", "Distribution")
        total = 0
        for title, count in zip(titles, counts):
            if count == 0:
                continue

            table_data = param_table_data[total : total + count]
            table_stubs = stubs[total : total + count]
            total += count
            table = SimpleTable(table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title)
            smry.tables.append(table)

        extra_text = (
            "Results generated with user-specified parameters.",
            "Since the model was not estimated, there are no std. " "errors.",
        )
        smry.add_extra_txt(extra_text)
        return smry
Ejemplo n.º 38
0
Archivo: base.py Proyecto: q1ang/arch
    def summary(self):
        """
        Constructs a summary of the results from a fit model.

        Returns
        -------
        summary : Summary instance
            Object that contains tables and facilitated export to text, html or
            latex
        """
        # Summary layout
        # 1. Overall information
        # 2. Mean parameters
        # 3. Volatility parameters
        # 4. Distribution parameters
        # 5. Notes

        model = self.model
        model_name = model.name + ' - ' + model.volatility.name

        # Summary Header
        top_left = [('Dep. Variable:', self._dep_name),
                    ('Mean Model:', model.name),
                    ('Vol Model:', model.volatility.name),
                    ('Distribution:', model.distribution.name),
                    ('Method:', 'User-specified Parameters'),
                    ('', ''),
                    ('Date:', self._datetime.strftime('%a, %b %d %Y')),
                    ('Time:', self._datetime.strftime('%H:%M:%S'))]

        top_right = [('R-squared:', '--'),
                     ('Adj. R-squared:', '--'),
                     ('Log-Likelihood:', '%#10.6g' % self.loglikelihood),
                     ('AIC:', '%#10.6g' % self.aic),
                     ('BIC:', '%#10.6g' % self.bic),
                     ('No. Observations:', self._nobs),
                     ('', ''),
                     ('', ''),]

        title = model_name + ' Model Results'
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        # create summary table instance
        smry = Summary()
        # Top Table
        # Parameter table
        fmt = fmt_2cols
        fmt['data_fmts'][1] = '%18s'

        top_right = [('%-21s' % ('  ' + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))
        smry.tables.append(table)

        stubs = self._names
        header = ['coef']
        vals = (self.params,)
        formats = [(10, 4)]
        pos = 0
        param_table_data = []
        for _ in range(len(vals[0])):
            row = []
            for i, val in enumerate(vals):
                if isinstance(val[pos], np.float64):
                    converted = format_float_fixed(val[pos], *formats[i])
                else:
                    converted = val[pos]
                row.append(converted)
            pos += 1
            param_table_data.append(row)

        mc = self.model.num_params
        vc = self.model.volatility.num_params
        dc = self.model.distribution.num_params
        counts = (mc, vc, dc)
        titles = ('Mean Model', 'Volatility Model', 'Distribution')
        total = 0
        for title, count in zip(titles, counts):
            if count == 0:
                continue

            table_data = param_table_data[total:total + count]
            table_stubs = stubs[total:total + count]
            total += count
            table = SimpleTable(table_data,
                                stubs=table_stubs,
                                txt_fmt=fmt_params,
                                headers=header, title=title)
            smry.tables.append(table)

        extra_text = ('Results generated with user-specified parameters.',
                      'Since the model was not estimated, there are no std. '
                      'errors.')
        smry.add_extra_txt(extra_text)
        return smry
Ejemplo n.º 39
0
def summary(self, yname=None, xname=None, title=0, alpha=.05,
            returns='text', model_info=None):
    """
    Parameters
    -----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Defualt :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import statsmodels as sm
    >>> data = sm.datasets.longley.load()
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    import time as time



    #TODO Make sure all self.model.__class__.__name__ are listed
    model_types = {'OLS' : 'Ordinary least squares',
                   'GLS' : 'Generalized least squares',
                   'GLSAR' : 'Generalized least squares with AR(p)',
                   'WLS' : 'Weigthed least squares',
                   'RLM' : 'Robust linear model',
                   'GLM' : 'Generalized linear model'
                   }
    model_methods = {'OLS' : 'Least Squares',
                   'GLS' : 'Least Squares',
                   'GLSAR' : 'Least Squares',
                   'WLS' : 'Least Squares',
                   'RLM' : '?',
                   'GLM' : '?'
                   }
    if title==0:
        title = model_types[self.model.__class__.__name__]
    if yname is None:
        try:
            yname = self.model.endog_names
        except AttributeError:
            yname = 'y'
    if xname is None:
        try:
            xname = self.model.exog_names
        except AttributeError:
            xname = ['var_%d' % i for i in range(len(self.params))]
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    #dist_family = self.model.family.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid

    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    #TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left =   [('Model type:', [modeltype]),
                  ('Date:', [date]),
                  ('Dependent Variable:', yname), #What happens with multiple names?
                  ('df model', [df_model])
                  ]
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_title = title
    gen_header = None
##    gen_stubs_left = ('Model type:',
##                      'Date:',
##                      'Dependent Variable:',
##                      'df model'
##                  )
##    gen_data_left = [[modeltype],
##                     [date],
##                     yname, #What happens with multiple names?
##                     [df_model]
##                     ]
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )

    gen_stubs_right = ('Method:',
                      'Time:',
                      'Number of Obs:',
                      'df resid'
                      )
    gen_data_right = ([modeltype], #was dist family need to look at more
                      time_of_day,
                      [nobs],
                      [df_resid]
                      )
    gen_table_right = SimpleTable(gen_data_right,
                                 gen_header,
                                 gen_stubs_right,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal
    tstats = {'OLS' : self.t(),
            'GLS' : self.t(),
            'GLSAR' : self.t(),
            'WLS' : self.t(),
            'RLM' : self.t(),
            'GLM' : self.t()
            }
    prob_stats = {'OLS' : self.pvalues,
                 'GLS' : self.pvalues,
                 'GLSAR' : self.pvalues,
                 'WLS' : self.pvalues,
                 'RLM' : self.pvalues,
                 'GLM' : self.pvalues
                }
    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    param_header = {
         'OLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'WLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLM'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution
         'RLM'   : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval']  #checke z
                   }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = xrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = zip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \
                                                             exog_len]
                      )
    parameter_table = SimpleTable(params_data,
                                  param_header[modeltype],
                                  params_stubs,
                                  title = None,
                                  txt_fmt = fmt_2, #gen_fmt,
                                  )

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics


    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table)+'\n'+str(parameter_table)
        return table

    def ols_to_csv():
        """
        exports ols summary data to csv
        """
        pass
    def glm_printer():
        table = str(general_table)+'\n'+str(parameter_table)
        return table
        pass

    printers  = {'OLS': ols_printer,
                'GLM' : glm_printer
                }

    if returns=='print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()
Ejemplo n.º 40
0
Archivo: base.py Proyecto: q1ang/arch
    def summary(self):
        """
        Constructs a summary of the results from a fit model.

        Returns
        -------
        summary : Summary instance
            Object that contains tables and facilitated export to text, html or
            latex
        """
        # Summary layout
        # 1. Overall information
        # 2. Mean parameters
        # 3. Volatility parameters
        # 4. Distribution parameters
        # 5. Notes

        model = self.model
        model_name = model.name + ' - ' + model.volatility.name

        # Summary Header
        top_left = [('Dep. Variable:', self._dep_name),
                    ('Mean Model:', model.name),
                    ('Vol Model:', model.volatility.name),
                    ('Distribution:', model.distribution.name),
                    ('Method:', 'Maximum Likelihood'),
                    ('', ''),
                    ('Date:', self._datetime.strftime('%a, %b %d %Y')),
                    ('Time:', self._datetime.strftime('%H:%M:%S'))]

        top_right = [('R-squared:', '%#8.3f' % self.rsquared),
                     ('Adj. R-squared:', '%#8.3f' % self.rsquared_adj),
                     ('Log-Likelihood:', '%#10.6g' % self.loglikelihood),
                     ('AIC:', '%#10.6g' % self.aic),
                     ('BIC:', '%#10.6g' % self.bic),
                     ('No. Observations:', self._nobs),
                     ('Df Residuals:', self.nobs - self.num_params),
                     ('Df Model:', self.num_params)]

        title = model_name + ' Model Results'
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        # create summary table instance
        smry = Summary()
        # Top Table
        # Parameter table
        fmt = fmt_2cols
        fmt['data_fmts'][1] = '%18s'

        top_right = [('%-21s' % ('  ' + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))
        smry.tables.append(table)

        conf_int = np.asarray(self.conf_int())
        conf_int_str = []
        for c in conf_int:
            conf_int_str.append('[' + format_float_fixed(c[0], 7, 3)
                                + ',' + format_float_fixed(c[1], 7, 3) + ']')

        stubs = self._names
        header = ['coef', 'std err', 't', 'P>|t|', '95.0% Conf. Int.']
        vals = (self.params,
                self.std_err,
                self.tvalues,
                self.pvalues,
                conf_int_str)
        formats = [(10, 4), (9, 3), (9, 3), (9, 3), None]
        pos = 0
        param_table_data = []
        for _ in range(len(vals[0])):
            row = []
            for i, val in enumerate(vals):
                if isinstance(val[pos], np.float64):
                    converted = format_float_fixed(val[pos], *formats[i])
                else:
                    converted = val[pos]
                row.append(converted)
            pos += 1
            param_table_data.append(row)

        mc = self.model.num_params
        vc = self.model.volatility.num_params
        dc = self.model.distribution.num_params
        counts = (mc, vc, dc)
        titles = ('Mean Model', 'Volatility Model', 'Distribution')
        total = 0
        for title, count in zip(titles, counts):
            if count == 0:
                continue

            table_data = param_table_data[total:total + count]
            table_stubs = stubs[total:total + count]
            total += count
            table = SimpleTable(table_data,
                                stubs=table_stubs,
                                txt_fmt=fmt_params,
                                headers=header, title=title)
            smry.tables.append(table)

        extra_text = ['Covariance estimator: ' + self.cov_type]

        if self.convergence_flag:
            extra_text.append("""
WARNING: The optimizer did not indicate sucessful convergence. The message was
{string_message}. See convergence_flag.""".format(
                string_message=self._optim_output[-1]))

        smry.add_extra_txt(extra_text)
        return smry