예제 #1
0
    def test_SimpleTable_3(self):
        """ Test SimpleTable.extend() as in extend down"""
        desired = '''
==============================
           header s1 header d1
------------------------------
stub R1 C1  10.30312  10.73999
stub R2 C1  90.30312  90.73999
           header s2 header d2
------------------------------
stub R1 C2  50.95038  50.65765
stub R2 C2  40.95038  40.65765
------------------------------
'''
        data1 = [[10.30312, 10.73999], [90.30312, 90.73999]]
        data2 = [[50.95038, 50.65765], [40.95038, 40.65765]]
        stubs1 = ['stub R1 C1', 'stub R2 C1']
        stubs2 = ['stub R1 C2', 'stub R2 C2']
        header1 = ['header s1', 'header d1']
        header2 = ['header s2', 'header d2']
        actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt)
        actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt)
        actual1.extend(actual2)
        actual = '\n%s\n' % actual1.as_text()
        self.assertEqual(desired, str(actual))
예제 #2
0
    def test_SimpleTable_1(self):
        """Basic test, test_SimpleTable_1"""
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312 2.73999
stub2 1.95038 2.65765
---------------------
'''
        test1data = [[1.30312, 2.73999],[1.95038, 2.65765]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data, test1header, test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        self.assertEqual(desired, str(actual))
예제 #3
0
    def summary_quantiles(self,
                          idx,
                          distppf,
                          frac=[0.01, 0.025, 0.05, 0.1, 0.975],
                          varnames=None,
                          title=None):
        '''summary table for quantiles (critical values)

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distppf : callable
            probability density function of reference distribution
            TODO: use `crit` values instead or additional, see summary_cdf
        frac : array_like, float
            probabilities for which
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print table` to see results

        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        quant, mcq = self.quantiles(idx, frac=frac)
        #not sure whether this will work with single quantile
        #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T)
        crit = distppf(np.atleast_2d(quant).T)
        mml = []
        for i, ix in enumerate(idx):  #TODO: hardcoded 2 ?
            mml.extend([mcq[:, i], crit[:, i]])
        #mmlar = np.column_stack(mml)
        mmlar = np.column_stack([quant] + mml)
        #print mmlar.shape
        if title:
            title = title + ' Quantiles (critical values)'
        else:
            title = 'Quantiles (critical values)'
        #TODO use stub instead
        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1] // 2)]
        headers = ['\nprob'] + [
            '%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist']
        ]
        return SimpleTable(mmlar,
                           txt_fmt={
                               'data_fmts':
                               ["%#6.3f"] + ["%#10.4f"] * (mmlar.shape[1] - 1)
                           },
                           title=title,
                           headers=headers)
예제 #4
0
    def test_SimpleTable_1(self):
        """Basic test, test_SimpleTable_1"""
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312 2.73999
stub2 1.95038 2.65765
---------------------
'''
        test1data = [[1.30312, 2.73999], [1.95038, 2.65765]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data,
                             test1header,
                             test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        self.assertEqual(desired, str(actual))
예제 #5
0
    def test_customlabel(self):
        """Limited test of custom custom labeling"""
        if has_numpy:
            tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1)
            tbl[1][1].data = np.nan
            tbl.label_cells(custom_labeller)
            #print([[c.datatype for c in row] for row in tbl])
            desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    --   *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
            actual = '\n%s\n' % tbl.as_text(missing='--')
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)
예제 #6
0
    def print_summary(self, stats):
#TODO: need to specify a table formating for the numbers, using defualt
        title = 'Summary Statistics'
        header = stats
        stubs = self.univariate['obs'][1]
        data = [[self.univariate[astat][2][col] for astat in stats] for col in
                                range(len(self.univariate['obs'][2]))]
        table = SimpleTable(data,
                            header,
                            stubs,
                            title=title,)
        return table
예제 #7
0
    def test_SimpleTable_3(self):
        """ Test SimpleTable.extend() as in extend down"""
        desired = '''
==============================
           header s1 header d1
------------------------------
stub R1 C1  10.30312  10.73999
stub R2 C1  90.30312  90.73999
           header s2 header d2
------------------------------
stub R1 C2  50.95038  50.65765
stub R2 C2  40.95038  40.65765
------------------------------
'''
        data1 = [[10.30312, 10.73999], [90.30312, 90.73999]]
        data2 = [[50.95038, 50.65765], [40.95038, 40.65765]]
        stubs1 = ['stub R1 C1', 'stub R2 C1']
        stubs2 = ['stub R1 C2', 'stub R2 C2']
        header1 = ['header s1', 'header d1']
        header2 = ['header s2', 'header d2']
        actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt)
        actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt)
        actual1.extend(actual2)
        actual = '\n%s\n' % actual1.as_text()
        self.assertEqual(desired, str(actual))
예제 #8
0
    def summary_cdf(self, idx, frac, crit, varnames=None, title=None):
        '''summary table for cumulative density function


        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        frac : array_like, float
            probabilities for which
        crit : array_like
            values for which cdf is calculated
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print table` to see results


        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        mml = []
        #TODO:need broadcasting in cdf
        for i in range(len(idx)):
            #print i, mc1.cdf(crit[:,i], [idx[i]])[1].ravel()
            mml.append(self.cdf(crit[:, i], [idx[i]])[1].ravel())
        #mml = self.cdf(crit, idx)[1]
        #mmlar = np.column_stack(mml)
        #print mml[0].shape, np.shape(frac)
        mmlar = np.column_stack([frac] + mml)
        #print mmlar.shape
        if title:
            title = title + ' Probabilites'
        else:
            title = 'Probabilities'
        #TODO use stub instead
        #headers = ['\nprob'] + ['var%d\n%s' % (i, t) for i in range(mmlar.shape[1]-1) for t in ['mc']]

        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1] - 1)]
        headers = ['prob'] + varnames
        return SimpleTable(mmlar,
                           txt_fmt={
                               'data_fmts': ["%#6.3f"] + ["%#10.4f"] *
                               (np.array(mml).shape[1] - 1)
                           },
                           title=title,
                           headers=headers)
예제 #9
0
 def summary_proc(self, g):
     """
     For internal use
     """
     if self.exog != None:
         myTitle = ('exog = ' + str(self.groups[g]) + '\n')
     else:
         myTitle = "Kaplan-Meier Curve"
     table = np.transpose(self.results[g])
     table = np.c_[np.transpose(self.ts[g]),table]
     table = SimpleTable(table, headers=['Time','Survival','Std. Err'],
                         title = myTitle)
     print(table)
예제 #10
0
    def summary_find_nfact(self):
        '''provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        '''
        if not hasattr(self, 'results_find_nfact'):
            self.fit_find_nfact()

        results = self.results_find_nfact
        sumstr = ''
        sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O'
        #        best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
        #                     np.argmin(results[:,-1],0))]

        sumstr += '\n' + ' ' * 19 + '%5d %4d %6d %5d' % tuple(self.best_nfact)

        from scikits.statsmodels.iolib.table import (SimpleTable,
                                                     default_txt_fmt,
                                                     default_latex_fmt,
                                                     default_html_fmt)

        headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
        numformat = ['%6d'] + ['%10.3f'] * 4  #'%10.4f'
        txt_fmt1 = dict(data_fmts=numformat)
        tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

        sumstr += '\n' + "PCA regression on simulated data,"
        sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables"
        sumstr += '\n' + tabl.__str__()
        sumstr += '\n' + "Notes: k is number of components of PCA,"
        sumstr += '\n' + "       constant is added additionally"
        sumstr += '\n' + "       k=0 means regression on constant only"
        sumstr += '\n' + "       L1O: sum of squared prediction errors for leave-one-out"
        return sumstr
예제 #11
0
    def summary_find_nfact(self):
        '''provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        '''
        if not hasattr(self, 'results_find_nfact'):
            self.fit_find_nfact()


        results = self.results_find_nfact
        sumstr = ''
        sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O'
#        best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
#                     np.argmin(results[:,-1],0))]

        sumstr += '\n' + ' '*19 + '%5d %4d %6d %5d' % tuple(self.best_nfact)

        from scikits.statsmodels.iolib.table import (SimpleTable, default_txt_fmt,
                                default_latex_fmt, default_html_fmt)

        headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
        numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f'
        txt_fmt1 = dict(data_fmts = numformat)
        tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

        sumstr += '\n' + "PCA regression on simulated data,"
        sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables"
        sumstr += '\n' + tabl.__str__()
        sumstr += '\n' + "Notes: k is number of components of PCA,"
        sumstr += '\n' + "       constant is added additionally"
        sumstr += '\n' + "       k=0 means regression on constant only"
        sumstr += '\n' + "       L1O: sum of squared prediction errors for leave-one-out"
        return sumstr
예제 #12
0
def summary_params_2d(result,
                      extras=None,
                      endog_names=None,
                      exog_names=None,
                      title=None):
    '''create summary table of regression parameters with several equations

    This allows interleaving of parameters with bse and/or tvalues

    Parameter
    ---------
    result : result instance
        the result instance with params and attributes in extras
    extras : list of strings
        additional attributes to add below a parameter row, e.g. bse or tvalues
    endog_names : None or list of strings
        names for rows of the parameter array (multivariate endog)
    exog_names : None or list of strings
        names for columns of the parameter array (exog)
    alpha : float
        level for confidence intervals, default 0.95
    title : None or string

    Returns
    -------
    tables : list of SimpleTable
        this contains a list of all seperate Subtables
    table_all : SimpleTable
        the merged table with results concatenated for each row of the parameter
        array

    '''
    if endog_names is None:
        #TODO: note the [1:] is specific to current MNLogit
        endog_names = [
            'endog_%d' % i for i in np.unique(result.model.endog)[1:]
        ]
    if exog_names is None:
        exog_names = ['var%d' % i for i in range(len(result.params))]

    #TODO: check formatting options with different values
    #res_params = [['%10.4f'%item for item in row] for row in result.params]
    res_params = [[forg(item, prec=4) for item in row]
                  for row in result.params]
    if extras:  #not None or non-empty
        #maybe this should be a simple triple loop instead of list comprehension?
        #below_list = [[['%10s' % ('('+('%10.3f'%v).strip()+')')
        extras_list = [[[
            '%10s' % ('(' + forg(v, prec=3).strip() + ')') for v in col
        ] for col in getattr(result, what)] for what in extras]
        data = zip(res_params, *extras_list)
        data = [i for j in data for i in j]  #flatten
        stubs = zip(endog_names, *[[''] * len(endog_names)] * len(extras))
        stubs = [i for j in stubs for i in j]  #flatten
        #return SimpleTable(data, headers=exog_names, stubs=stubs)
    else:
        data = res_params
        stubs = endog_names


#        return SimpleTable(data, headers=exog_names, stubs=stubs,
#                       data_fmts=['%10.4f'])

    import copy
    txt_fmt = copy.deepcopy(fmt_params)
    txt_fmt.update(dict(data_fmts=["%s"] * result.params.shape[1]))
    return SimpleTable(
        data,
        headers=exog_names,
        stubs=stubs,
        title=title,
        #                             data_fmts = ["%s"]),
        txt_fmt=txt_fmt)
예제 #13
0
    def test_SimpleTable_4(self):
        """Basic test, test_SimpleTable_4
        test uses custom txt_fmt"""
        txt_fmt1 = dict(data_fmts=['%3.2f', '%d'],
                        empty_cell=' ',
                        colwidths=1,
                        colsep=' * ',
                        row_pre='* ',
                        row_post=' *',
                        table_dec_above='*',
                        table_dec_below='*',
                        header_dec_below='*',
                        header_fmt='%s',
                        stub_fmt='%s',
                        title_align='r',
                        header_align='r',
                        data_aligns="r",
                        stubs_align="l",
                        fmt='txt')
        ltx_fmt1 = default_latex_fmt.copy()
        html_fmt1 = default_html_fmt.copy()
        cell0data = 0.0000
        cell1data = 1
        row0data = [cell0data, cell1data]
        row1data = [2, 3.333]
        table1data = [row0data, row1data]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        tbl = SimpleTable(table1data,
                          test1header,
                          test1stubs,
                          txt_fmt=txt_fmt1,
                          ltx_fmt=ltx_fmt1,
                          html_fmt=html_fmt1)

        def test_txt_fmt1(self):
            """Limited test of custom txt_fmt"""
            desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    0.00 *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
            actual = '\n%s\n' % tbl.as_text()
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)

            def test_ltx_fmt1(self):
                """Limited test of custom ltx_fmt"""
                desired = r"""
\begin{tabular}{lcc}
\toprule
                        & \textbf{header1} & \textbf{header2}  \\
\midrule
\textbf{stub1} &       0.0        &        1          \\
\textbf{stub2} &        2         &      3.333        \\
\bottomrule
\end{tabular}
"""

            actual = '\n%s\n' % tbl.as_latex_tabular()
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)

        def test_html_fmt1(self):
            """Limited test of custom html_fmt"""
            desired = """
<table class="simpletable">
<tr>
    <td></td>    <th>header1</th> <th>header2</th>
</tr>
<tr>
  <th>stub1</th>   <td>0.0</td>      <td>1</td>
</tr>
<tr>
  <th>stub2</th>    <td>2</td>     <td>3.333</td>
</tr>
</table>
"""
            actual = '\n%s\n' % tbl.as_html()
            print(actual)
            print(desired)
            self.assertEqual(actual, desired)
예제 #14
0
    stub_fmt = '%s',
    title_align='r',
    header_align = 'r',
    data_aligns = "r",
    stubs_align = "l",
    fmt = 'txt'
)
cell0data = 0.0000
cell1data = 1
row0data = [cell0data, cell1data]
row1data = [2, 3.333]
table1data = [ row0data, row1data ]
test1stubs = ('stub1', 'stub2')
test1header = ('header1', 'header2')
#test1header = ('header1\nheader1a', 'header2\nheader2a')
tbl = SimpleTable(table1data, test1header, test1stubs,
    txt_fmt=txt_fmt1, ltx_fmt=ltx_fmt1, html_fmt=html_fmt1)


def custom_labeller(cell):
    if cell.data is np.nan:
        return 'missing'



class test_Cell(unittest.TestCase):
    def test_celldata(self):
        celldata = cell0data, cell1data, row1data[0], row1data[1]
        cells = [Cell(datum, datatype=i%2) for i, datum in enumerate(celldata)]
        for cell, datum in zip(cells, celldata):
            self.assertEqual(cell.data, datum)
    def summary_old(self, yname=None, xname=None, title='Generalized linear model',
                returns='text'):
        """
        Print a table of results or returns SimpleTable() instance which
        summarizes the Generalized linear model results.

        Parameters
        -----------
        yname : string
                optional, Default is `Y`
        xname : list of strings
                optional, Default is `X.#` for # in p the number of regressors
        title : string
                optional, Defualt is 'Generalized linear model'
        returns : string
                  'text', 'table', 'csv', 'latex', 'html'

        Returns
        -------
        Defualt :
        returns='print'
                Prints the summarirized results

        Option :
        returns='text'
                Prints the summarirized results

        Option :
        returns='table'
                 SimpleTable instance : summarizing the fit of a linear model.

        Option :
        returns='csv'
                returns a string of csv of the results, to import into a spreadsheet

        Option :
        returns='latex'
        Not implimented yet

        Option :
        returns='HTML'
        Not implimented yet


        Examples (needs updating)
        --------
        >>> import scikits.statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> ols_results = sm.OLS(data.endog, data.exog).results
        >>> print ols_results.summary()
        ...

        Notes
        -----
        stand_errors are not implimented.
        conf_int calculated from normal dist.
        """
        import time as Time
        from scikits.statsmodels.iolib.table import SimpleTable
        from scikits.statsmodels.stats.stattools import (jarque_bera,
                omni_normtest, durbin_watson)

        yname = 'Y'
        if xname is None:
            xname = ['x%d' % i for i in range(self.model.exog.shape[1])]

        #List of results used in summary
        #yname = yname
        #xname = xname
        time = Time.localtime()
        dist_family = self.model.family.__class__.__name__
        aic = self.aic
        bic = self.bic
        deviance = self.deviance
        df_model = self.df_model
        df_resid = self.df_resid
        fittedvalues = self.fittedvalues
        llf = self.llf
        mu = self.mu
        nobs = self.nobs
        normalized_cov_params = self.normalized_cov_params
        null_deviance = self.null_deviance
        params = self.params
        pearson_chi2 = self.pearson_chi2
        pinv_wexog = self.pinv_wexog
        resid_anscombe = self.resid_anscombe
        resid_deviance = self.resid_deviance
        resid_pearson = self.resid_pearson
        resid_response = self.resid_response
        resid_working = self.resid_working
        scale = self.scale
#TODO   #stand_errors = self.stand_errors
        stand_errors = self.bse  #[' ' for x in range(len(self.params))]
#Added note about conf_int
        pvalues = self.pvalues
        conf_int = self.conf_int()
        cov_params = self.cov_params()
        #f_test() = self.f_test()
        t = self.tvalues
        #t_test = self.t_test()



        table_1l_fmt = dict(
            data_fmts = ["%s", "%s", "%s", "%s", "%s"],
            empty_cell = '',
            colwidths = 15,
            colsep='   ',
            row_pre = '  ',
            row_post = '  ',
            table_dec_above='=',
            table_dec_below='',
            header_dec_below=None,
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = "r",
            stubs_align = "l",
            fmt = 'txt'
            )
        # Note table_1l_fmt over rides the below formating. in extend_right? JP
        table_1r_fmt = dict(
            data_fmts = ["%s", "%s", "%s", "%s", "%1s"],
            empty_cell = '',
            colwidths = 12,
            colsep='   ',
            row_pre = '',
            row_post = '',
            table_dec_above='=',
            table_dec_below='',
            header_dec_below=None,
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = "r",
            stubs_align = "l",
            fmt = 'txt'
            )

        table_2_fmt = dict(
            data_fmts = ["%s", "%s", "%s", "%s"],
            #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"],
            #data_fmts = ["%#10.4g","%#6.4f", "%#6.4f"],
            #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"],
            empty_cell = '',
            colwidths = 13,
            colsep=' ',
            row_pre = '  ',
            row_post = '   ',
            table_dec_above='=',
            table_dec_below='=',
            header_dec_below='-',
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = 'r',
            stubs_align = 'l',
            fmt = 'txt'
        )
        ########  summary table 1   #######
        table_1l_title = title
        table_1l_header = None
        table_1l_stubs = ('Model Family:',
                          'Method:',
                          'Dependent Variable:',
                          'Date:',
                          'Time:',
                          )
        table_1l_data = [
                         [dist_family],
                         ['IRLS'],
                         [yname],
                         [Time.strftime("%a, %d %b %Y",time)],
                         [Time.strftime("%H:%M:%S",time)],
                        ]
        table_1l = SimpleTable(table_1l_data,
                            table_1l_header,
                            table_1l_stubs,
                            title=table_1l_title,
                            txt_fmt = table_1l_fmt)
        table_1r_title = None
        table_1r_header = None
        table_1r_stubs = ('# of obs:',
                          'Df residuals:',
                          'Df model:',
                          'Scale:',
                          'Log likelihood:'
                          )
        table_1r_data = [
                         [nobs],
                         [df_resid],
                         [df_model],
                         ["%#6.4f" % (scale,)],
                         ["%#6.4f" % (llf,)]
                        ]
        table_1r = SimpleTable(table_1r_data,
                            table_1r_header,
                            table_1r_stubs,
                            title=table_1r_title,
                            txt_fmt = table_1r_fmt)

        ########  summary table 2   #######
#TODO add % range to confidance interval column header
        table_2header = ('coefficient', 'stand errors', 't-statistic',
        'Conf. Interval')
        table_2stubs = xname
        table_2data = zip(["%#6.4f" % (params[i]) for i in range(len(xname))],
                          ["%#6.4f" % stand_errors[i] for i in range(len(xname))],
                          ["%#6.4f" % (t[i]) for i in range(len(xname))],
                          [""" [%#6.3f, %#6.3f]""" % tuple(conf_int[i]) for i in
                                                             range(len(xname))])


        #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]}
        table_2 = SimpleTable(table_2data,
                            table_2header,
                            table_2stubs,
                            title=None,
                            txt_fmt = table_2_fmt)

        ########  Return Summary Tables ########
        # join table table_s then print
        if returns == 'text':
            table_1l.extend_right(table_1r)
            return str(table_1l) + '\n' +  str(table_2)
        elif returns == 'print':
            table_1l.extend_right(table_1r)
            print(str(table_1l) + '\n' +  str(table_2))
        elif returns == 'tables':
            return [table_1l, table_1r, table_2]
            #return [table_1, table_2 ,table_3L, notes]
        elif returns == 'csv':
            return table_1.as_csv() + '\n' + table_2.as_csv() + '\n' + \
                   table_3L.as_csv()
        elif returns == 'latex':
            print('not avalible yet')
        elif returns == html:
            print('not avalible yet')
예제 #16
0
    for inidx, outidx in LeaveOneOut(len(y0)):
        resl1o = sm.OLS(y0[inidx], fact_wconst[inidx, :]).fit()
        #print data.endog[outidx], res.model.predict(data.exog[outidx,:]),
        prederr2 += (y0[outidx] -
                     resl1o.model.predict(fact_wconst[outidx, :]))**2.
    results.append([k, res.aic, res.bic, res.rsquared_adj, prederr2])

results = np.array(results)
print results
print 'best result for k, by AIC, BIC, R2_adj, L1O'
print np.r_[(np.argmin(results[:, 1:3],
                       0), np.argmax(results[:, 3],
                                     0), np.argmin(results[:, -1], 0))]

from scikits.statsmodels.iolib.table import (SimpleTable, default_txt_fmt,
                                             default_latex_fmt,
                                             default_html_fmt)

headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
numformat = ['%6d'] + ['%10.3f'] * 4  #'%10.4f'
txt_fmt1 = dict(data_fmts=numformat)
tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

print "PCA regression on simulated data,"
print "DGP: 2 factors and 4 explanatory variables"
print tabl
print "Notes: k is number of components of PCA,"
print "       constant is added additionally"
print "       k=0 means regression on constant only"
print "       L1O: sum of squared prediction errors for leave-one-out"
예제 #17
0
    def summary(self, yname=None, xname=None, returns='text'):
        """returns a string that summarizes the regression results

        Parameters
        -----------
        yname : string, optional
            Default is `Y`
        xname : list of strings, optional
            Default is `X.#` for # in p the number of regressors

        Returns
        -------
        String summarizing the fit of a linear model.

        Examples
        --------
        >>> import scikits.statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> ols_results = sm.OLS(data.endog, data.exog).results
        >>> print ols_results.summary()
        ...

        Notes
        -----
        All residual statistics are calculated on whitened residuals.
        """
        import time
        from scikits.statsmodels.iolib.table import SimpleTable
        from scikits.statsmodels.stats.stattools import (jarque_bera,
                omni_normtest, durbin_watson)

        if yname is None:
            yname = self.model.endog_names
        if xname is None:
            xname = self.model.exog_names
        modeltype = self.model.__class__.__name__

        llf, aic, bic = self.llf, self.aic, self.bic
        JB, JBpv, skew, kurtosis = jarque_bera(self.wresid)
        omni, omnipv = omni_normtest(self.wresid)

        t = time.localtime()

        part1_fmt = dict(
            data_fmts = ["%s"],
            empty_cell = '',
            colwidths = 15,
            colsep=' ',
            row_pre = '| ',
            row_post = '|',
            table_dec_above='=',
            table_dec_below='',
            header_dec_below=None,
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = "r",
            stubs_align = "l",
            fmt = 'txt'
        )
        part2_fmt = dict(
            #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"],
            data_fmts = ["%#10.4g","%#10.4g","%#6.4f","%#6.4f"],
            #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"],
            empty_cell = '',
            colwidths = 14,
            colsep=' ',
            row_pre = '| ',
            row_post = ' |',
            table_dec_above='=',
            table_dec_below='=',
            header_dec_below='-',
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = 'r',
            stubs_align = 'l',
            fmt = 'txt'
        )
        part3_fmt = dict(
            #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"],
            data_fmts = ["%#10.4g","%#10.4g","%#10.4g","%#6.4g"],
            empty_cell = '',
            colwidths = 15,
            colsep='   ',
            row_pre = '| ',
            row_post = '  |',
            table_dec_above=None,
            table_dec_below='-',
            header_dec_below='-',
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = 'r',
            stubs_align = 'l',
            fmt = 'txt'
        )

        # Print the first part of the summary table
        part1data = [[yname],
                     [modeltype],
                     ['Least Squares'],
                     [time.strftime("%a, %d %b %Y",t)],
                     [time.strftime("%H:%M:%S",t)],
                     [self.nobs],
                     [self.df_resid],
                     [self.df_model]]
        part1header = None
        part1title = 'Summary of Regression Results'
        part1stubs = ('Dependent Variable:',
                      'Model:',
                      'Method:',
                      'Date:',
                      'Time:',
                      '# obs:',
                      'Df residuals:',
                      'Df model:')
        part1 = SimpleTable(part1data,
                            part1header,
                            part1stubs,
                            title=part1title,
                            txt_fmt = part1_fmt)

        ########  summary Part 2   #######

        part2data = zip([self.params[i] for i in range(len(xname))],
                        [self.bse[i] for i in range(len(xname))],
                        [self.tvalues[i] for i in range(len(xname))],
                        [self.pvalues[i] for i in range(len(xname))])
        part2header = ('coefficient', 'std. error', 't-statistic', 'prob.')
        part2stubs = xname
        #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]}
        part2 = SimpleTable(part2data,
                            part2header,
                            part2stubs,
                            title=None,
                            txt_fmt = part2_fmt)

        self.summary2 = part2
        ########  summary Part 3   #######

        part3Lheader = ['Models stats']
        part3Rheader = ['Residual stats']
        part3Lstubs = ('R-squared:',
                       'Adjusted R-squared:',
                       'F-statistic:',
                       'Prob (F-statistic):',
                       'Log likelihood:',
                       'AIC criterion:',
                       'BIC criterion:',)
        part3Rstubs = ('Durbin-Watson:',
                       'Omnibus:',
                       'Prob(Omnibus):',
                       'JB:',
                       'Prob(JB):',
                       'Skew:',
                       'Kurtosis:')
        part3Ldata = [[self.rsquared], [self.rsquared_adj],
                      [self.fvalue],
                      [self.f_pvalue],
                      [llf],
                      [aic],
                      [bic]]
        part3Rdata = [[durbin_watson(self.wresid)],
                      [omni],
                      [omnipv],
                      [JB],
                      [JBpv],
                      [skew],
                      [kurtosis]]
        part3L = SimpleTable(part3Ldata, part3Lheader, part3Lstubs,
                             txt_fmt = part3_fmt)
        part3R = SimpleTable(part3Rdata, part3Rheader, part3Rstubs,
                             txt_fmt = part3_fmt)
        part3L.extend_right(part3R)
        ########  Return Summary Tables ########
        # join table parts then print
        if returns == 'text':
            return str(part1) + '\n' +  str(part2) + '\n' + str(part3L)
        elif returns == 'tables':
            return [part1, part2 ,part3L]
        elif returns == 'csv':
            return part1.as_csv() + '\n' + part2.as_csv() + '\n' + \
                   part3L.as_csv()
        elif returns == 'latex':
            print('not available yet')
        elif returns == 'html':
            print('not available yet')
예제 #18
0
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    #time and names are always included
    import time
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    #create dictionary with default
    #use lambdas because some values raise exception if they are not available
    #alternate spellings are commented out to force unique labels
    default_items = dict([
          ('Dependent Variable:', lambda: [yname]),
          ('Dep. Variable:', lambda: [yname]),
          ('Model:', lambda: [results.model.__class__.__name__]),
          #('Model type:', lambda: [results.model.__class__.__name__]),
          ('Date:', lambda: [date]),
          ('Time:', lambda: time_of_day),
          ('Number of Obs:', lambda: [results.nobs]),
          #('No. of Observations:', lambda: ["%#6d" % results.nobs]),
          ('No. Observations:', lambda: ["%#6d" % results.nobs]),
          #('Df model:', lambda: [results.df_model]),
          ('Df Model:', lambda: ["%#6d" % results.df_model]),
          #TODO: check when we have non-integer df
          ('Df Residuals:', lambda: ["%#6d" % results.df_resid]),
          #('Df resid:', lambda: [results.df_resid]),
          #('df resid:', lambda: [results.df_resid]), #check capitalization
          ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception
          #('Method:', lambda: [???]), #no default for this
          ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        #default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None),
                    ('Model type:', None),
                    ('Date:', None),
                    ('No. Observations:', None)
                    ('Df model:', None),
                    ('Df resid:', None)]

        try:
            llf = results.llf
            gen_left.append(('Log-Likelihood', None))
        except: #AttributeError, NotImplementedError
            pass

        gen_right = []


    gen_title = title
    gen_header = None

    #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore
    #replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  #let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  #let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    #check
    missing_values = [k for k,v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    #pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            #fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            #fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        #padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  '+k), v) for k,v in gen_right]

        gen_stubs_right, gen_data_right = map(None, *gen_right) #transpose row col
        gen_table_right = SimpleTable(gen_data_right,
                                      gen_header,
                                      gen_stubs_right,
                                      title = gen_title,
                                      txt_fmt = fmt_2cols #gen_fmt
                                      )
    else:
        gen_table_right = []  #because .extend_right seems works with []


    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = map(None, *gen_left)

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = fmt_2cols
                                 )


    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table #, gen_table_left, gen_table_right
예제 #19
0
def summary(self,
            yname=None,
            xname=None,
            title=0,
            alpha=.05,
            returns='text',
            model_info=None):
    """
    Parameters
    -----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Defualt :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import scikits.statsmodels as sm
    >>> data = sm.datasets.longley.load()
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    import time as time

    #TODO Make sure all self.model.__class__.__name__ are listed
    model_types = {
        'OLS': 'Ordinary least squares',
        'GLS': 'Generalized least squares',
        'GLSAR': 'Generalized least squares with AR(p)',
        'WLS': 'Weigthed least squares',
        'RLM': 'Robust linear model',
        'GLM': 'Generalized linear model'
    }
    model_methods = {
        'OLS': 'Least Squares',
        'GLS': 'Least Squares',
        'GLSAR': 'Least Squares',
        'WLS': 'Least Squares',
        'RLM': '?',
        'GLM': '?'
    }
    if title == 0:
        title = model_types[self.model.__class__.__name__]
    if yname is None:
        try:
            yname = self.model.endog_names
        except AttributeError:
            yname = 'y'
    if xname is None:
        try:
            xname = self.model.exog_names
        except AttributeError:
            xname = ['var_%d' % i for i in range(len(self.params))]
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    #dist_family = self.model.family.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid

    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    #TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left = [
        ('Model type:', [modeltype]),
        ('Date:', [date]),
        ('Dependent Variable:', yname),  #What happens with multiple names?
        ('df model', [df_model])
    ]
    gen_stubs_left, gen_data_left = map(None, *gen_left)  #transpose row col

    gen_title = title
    gen_header = None
    ##    gen_stubs_left = ('Model type:',
    ##                      'Date:',
    ##                      'Dependent Variable:',
    ##                      'df model'
    ##                  )
    ##    gen_data_left = [[modeltype],
    ##                     [date],
    ##                     yname, #What happens with multiple names?
    ##                     [df_model]
    ##                     ]
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title=gen_title,
                                 txt_fmt=gen_fmt)

    gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid')
    gen_data_right = (
        [modeltype],  #was dist family need to look at more
        time_of_day,
        [nobs],
        [df_resid])
    gen_table_right = SimpleTable(gen_data_right,
                                  gen_header,
                                  gen_stubs_right,
                                  title=gen_title,
                                  txt_fmt=gen_fmt)
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal
    tstats = {
        'OLS': self.t(),
        'GLS': self.t(),
        'GLSAR': self.t(),
        'WLS': self.t(),
        'RLM': self.t(),
        'GLM': self.t()
    }
    prob_stats = {
        'OLS': self.pvalues,
        'GLS': self.pvalues,
        'GLSAR': self.pvalues,
        'WLS': self.pvalues,
        'RLM': self.pvalues,
        'GLM': self.pvalues
    }
    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1 - alpha) * 100) + '%'
    param_header = {
        'OLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'GLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'GLSAR': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'WLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'GLM': ['coef', 'std err', 't', 'P>|t|',
                alp + ' Conf. Interval'],  #glm uses t-distribution
        'RLM': ['coef', 'std err', 'z', 'P>|z|',
                alp + ' Conf. Interval']  #checke z
    }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = xrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = zip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \
                                                             exog_len]
                      )
    parameter_table = SimpleTable(
        params_data,
        param_header[modeltype],
        params_stubs,
        title=None,
        txt_fmt=fmt_2,  #gen_fmt,
    )

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics

    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table) + '\n' + str(parameter_table)
        return table

    def ols_to_csv():
        """
        exports ols summary data to csv
        """
        pass

    def glm_printer():
        table = str(general_table) + '\n' + str(parameter_table)
        return table
        pass

    printers = {'OLS': ols_printer, 'GLM': glm_printer}

    if returns == 'print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()
예제 #20
0
res = sm.OLS(data.endog, data.exog).fit()
resparams[:, 0] = res.params

indall = range(7)
for i in range(6):
    ind = indall[:]
    del ind[i]
    res = sm.OLS(data.endog, data.exog[:, ind]).fit()
    resparams[ind, i + 1] = res.params

if rescale == 1:
    pass
if rescale == 3:
    resparams[:-1, :] *= rescale_ratio[:, None]

txt_fmt1 = default_txt_fmt
numformat = '%10.4f'
txt_fmt1 = dict(data_fmts=[numformat])
rowstubs = data.names[1:] + ['const']
headers = ['all'] + ['drop %s' % name for name in data.names[1:]]
tabl = SimpleTable(resparams, headers, rowstubs, txt_fmt=txt_fmt1)

nanstring = numformat % np.nan
nn = len(nanstring)
nanrep = ' ' * (nn - 1)
nanrep = nanrep[:nn // 2] + '-' + nanrep[nn // 2:]

print 'Longley data - sensitivity to dropping an explanatory variable'
#print tabl
print str(tabl).replace(nanstring, nanrep)
예제 #21
0
def summary_params(results,
                   yname=None,
                   xname=None,
                   alpha=.05,
                   use_t=True,
                   skip_header=False):
    '''create a summary table for the parameters

    Parameters
    ----------
    res : results instance
        some required information is directly taken from the result
        instance
    yname : string or None
        optional name for the endogenous variable, default is "y"
    xname : list of strings or None
        optional names for the exogenous variables, default is "var_xx"
    alpha : float
        significance level for the confidence intervals
    use_t : bool
        indicator whether the p-values are based on the Student-t
        distribution (if True) or on the normal distribution (if False)
    skip_headers : bool
        If false (default), then the header row is added. If true, then no
        header row is added.

    Returns
    -------
    params_table : SimpleTable instance
    '''

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal

    if isinstance(results, tuple):
        #for multivariate endog
        #TODO: check whether I don't want to refactor this
        #we need to give parameter alpha to conf_int
        results, params, std_err, tvalues, pvalues, conf_int = results
    else:
        params = results.params
        std_err = results.bse
        tvalues = results.tvalues  #is this sometimes called zvalues
        pvalues = results.pvalues
        conf_int = results.conf_int(alpha)

    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1 - alpha) * 100) + '%'
    if use_t:
        param_header = [
            'coef', 'std err', 't', 'P>|t|', '[' + alp + ' Conf. Int.]'
        ]
    else:
        param_header = [
            'coef', 'std err', 'z', 'P>|z|', '[' + alp + ' Conf. Int.]'
        ]

    if skip_header:
        param_header = None

    _, xname = _getnames(results, yname=yname, xname=xname)

    params_stubs = xname

    exog_idx = xrange(len(xname))

    #center confidence intervals if they are unequal lengths
    #    confint = ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
    #                                                             exog_idx]
    confint = ["%s %s" % tuple(map(forg, conf_int[i])) for i in \
                                                             exog_idx]
    len_ci = map(len, confint)
    max_ci = max(len_ci)
    min_ci = min(len_ci)

    if min_ci < max_ci:
        confint = [ci.center(max_ci) for ci in confint]

    #explicit f/g formatting, now uses forg, f or g depending on values


#    params_data = zip(["%#6.4g" % (params[i]) for i in exog_idx],
#                       ["%#6.4f" % (std_err[i]) for i in exog_idx],
#                       ["%#6.3f" % (tvalues[i]) for i in exog_idx],
#                       ["%#6.3f" % (pvalues[i]) for i in exog_idx],
#                       confint
##                       ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
##                                                             exog_idx]
#                      )

    params_data = zip(
        [forg(params[i], prec=4)
         for i in exog_idx], [forg(std_err[i]) for i in exog_idx],
        [forg(tvalues[i]) for i in exog_idx],
        ["%#6.3f" % (pvalues[i]) for i in exog_idx], confint
        #                       ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
        #                                                             exog_idx]
    )
    parameter_table = SimpleTable(
        params_data,
        param_header,
        params_stubs,
        title=None,
        txt_fmt=fmt_params  #gen_fmt #fmt_2, #gen_fmt,
    )

    return parameter_table
예제 #22
0
def summary_top(results,
                title=None,
                gleft=None,
                gright=None,
                yname=None,
                xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    #time and names are always included
    import time
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    #create dictionary with default
    #use lambdas because some values raise exception if they are not available
    #alternate spellings are commented out to force unique labels
    default_items = dict([
        ('Dependent Variable:', lambda: [yname]),
        ('Dep. Variable:', lambda: [yname]),
        ('Model:', lambda: [results.model.__class__.__name__]),
        #('Model type:', lambda: [results.model.__class__.__name__]),
        ('Date:', lambda: [date]),
        ('Time:', lambda: time_of_day),
        ('Number of Obs:', lambda: [results.nobs]),
        #('No. of Observations:', lambda: ["%#6d" % results.nobs]),
        ('No. Observations:', lambda: ["%#6d" % results.nobs]),
        #('Df model:', lambda: [results.df_model]),
        ('Df Model:', lambda: ["%#6d" % results.df_model]),
        #TODO: check when we have non-integer df
        ('Df Residuals:', lambda: ["%#6d" % results.df_resid]),
        #('Df resid:', lambda: [results.df_resid]),
        #('df resid:', lambda: [results.df_resid]), #check capitalization
        ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]
         )  #doesn't exist for RLM - exception
        #('Method:', lambda: [???]), #no default for this
    ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        #default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None), ('Model type:', None),
                    ('Date:', None), ('No. Observations:', None)('Df model:',
                                                                 None),
                    ('Df resid:', None)]

        try:
            llf = results.llf
            gen_left.append(('Log-Likelihood', None))
        except:  #AttributeError, NotImplementedError
            pass

        gen_right = []

    gen_title = title
    gen_header = None

    #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore
    #replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  #let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  #let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    #check
    missing_values = [k for k, v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    #pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            #fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            #fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        #padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  ' + k), v) for k, v in gen_right]

        gen_stubs_right, gen_data_right = map(None,
                                              *gen_right)  #transpose row col
        gen_table_right = SimpleTable(
            gen_data_right,
            gen_header,
            gen_stubs_right,
            title=gen_title,
            txt_fmt=fmt_2cols  #gen_fmt
        )
    else:
        gen_table_right = []  #because .extend_right seems works with []

    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = map(None, *gen_left)

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title=gen_title,
                                 txt_fmt=fmt_2cols)

    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table  #, gen_table_left, gen_table_right
예제 #23
0
    def summary(self, yname=None, xname=None, returns='text'):
        """returns a string that summarizes the regression results

        Parameters
        -----------
        yname : string, optional
            Default is `Y`
        xname : list of strings, optional
            Default is `X.#` for # in p the number of regressors

        Returns
        -------
        String summarizing the fit of a linear model.

        Examples
        --------
        >>> import scikits.statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> ols_results = sm.OLS(data.endog, data.exog).results
        >>> print ols_results.summary()
        ...

        Notes
        -----
        All residual statistics are calculated on whitened residuals.
        """
        import time
        from scikits.statsmodels.iolib.table import SimpleTable
        from scikits.statsmodels.stats.stattools import (jarque_bera,
                omni_normtest, durbin_watson)

        if yname is None:
            yname = self.model.endog_names
        if xname is None:
            xname = self.model.exog_names
        modeltype = self.model.__class__.__name__

        llf, aic, bic = self.llf, self.aic, self.bic
        JB, JBpv, skew, kurtosis = jarque_bera(self.wresid)
        omni, omnipv = omni_normtest(self.wresid)

        t = time.localtime()

        part1_fmt = dict(
            data_fmts = ["%s"],
            empty_cell = '',
            colwidths = 15,
            colsep=' ',
            row_pre = '| ',
            row_post = '|',
            table_dec_above='=',
            table_dec_below='',
            header_dec_below=None,
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = "r",
            stubs_align = "l",
            fmt = 'txt'
        )
        part2_fmt = dict(
            #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"],
            data_fmts = ["%#10.4g","%#10.4g","%#6.4f","%#6.4f"],
            #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"],
            empty_cell = '',
            colwidths = 14,
            colsep=' ',
            row_pre = '| ',
            row_post = ' |',
            table_dec_above='=',
            table_dec_below='=',
            header_dec_below='-',
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = 'r',
            stubs_align = 'l',
            fmt = 'txt'
        )
        part3_fmt = dict(
            #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"],
            data_fmts = ["%#10.4g","%#10.4g","%#10.4g","%#6.4g"],
            empty_cell = '',
            colwidths = 15,
            colsep='   ',
            row_pre = '| ',
            row_post = '  |',
            table_dec_above=None,
            table_dec_below='-',
            header_dec_below='-',
            header_fmt = '%s',
            stub_fmt = '%s',
            title_align='c',
            header_align = 'r',
            data_aligns = 'r',
            stubs_align = 'l',
            fmt = 'txt'
        )

        # Print the first part of the summary table
        part1data = [[yname],
                     [modeltype],
                     ['Least Squares'],
                     [time.strftime("%a, %d %b %Y",t)],
                     [time.strftime("%H:%M:%S",t)],
                     [self.nobs],
                     [self.df_resid],
                     [self.df_model]]
        part1header = None
        part1title = 'Summary of Regression Results'
        part1stubs = ('Dependent Variable:',
                      'Model:',
                      'Method:',
                      'Date:',
                      'Time:',
                      '# obs:',
                      'Df residuals:',
                      'Df model:')
        part1 = SimpleTable(part1data,
                            part1header,
                            part1stubs,
                            title=part1title,
                            txt_fmt = part1_fmt)

        ########  summary Part 2   #######

        part2data = zip([self.params[i] for i in range(len(xname))],
                        [self.bse[i] for i in range(len(xname))],
                        [self.tvalues[i] for i in range(len(xname))],
                        [self.pvalues[i] for i in range(len(xname))])
        part2header = ('coefficient', 'std. error', 't-statistic', 'prob.')
        part2stubs = xname
        #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]}
        part2 = SimpleTable(part2data,
                            part2header,
                            part2stubs,
                            title=None,
                            txt_fmt = part2_fmt)

        self.summary2 = part2
        ########  summary Part 3   #######

        part3Lheader = ['Models stats']
        part3Rheader = ['Residual stats']
        part3Lstubs = ('R-squared:',
                       'Adjusted R-squared:',
                       'F-statistic:',
                       'Prob (F-statistic):',
                       'Log likelihood:',
                       'AIC criterion:',
                       'BIC criterion:',)
        part3Rstubs = ('Durbin-Watson:',
                       'Omnibus:',
                       'Prob(Omnibus):',
                       'JB:',
                       'Prob(JB):',
                       'Skew:',
                       'Kurtosis:')
        part3Ldata = [[self.rsquared], [self.rsquared_adj],
                      [self.fvalue],
                      [self.f_pvalue],
                      [llf],
                      [aic],
                      [bic]]
        part3Rdata = [[durbin_watson(self.wresid)],
                      [omni],
                      [omnipv],
                      [JB],
                      [JBpv],
                      [skew],
                      [kurtosis]]
        part3L = SimpleTable(part3Ldata, part3Lheader, part3Lstubs,
                             txt_fmt = part3_fmt)
        part3R = SimpleTable(part3Rdata, part3Rheader, part3Rstubs,
                             txt_fmt = part3_fmt)
        part3L.extend_right(part3R)
        ########  Return Summary Tables ########
        # join table parts then print
        if returns == 'text':
            return str(part1) + '\n' +  str(part2) + '\n' + str(part3L)
        elif returns == 'tables':
            return [part1, part2 ,part3L]
        elif returns == 'csv':
            return part1.as_csv() + '\n' + part2.as_csv() + '\n' + \
                   part3L.as_csv()
        elif returns == 'latex':
            print('not available yet')
        elif returns == 'html':
            print('not available yet')
예제 #24
0
def summary(self, yname=None, xname=None, title=0, alpha=.05,
            returns='text', model_info=None):
    """
    Parameters
    -----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Defualt :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import scikits.statsmodels as sm
    >>> data = sm.datasets.longley.load()
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    import time as time



    #TODO Make sure all self.model.__class__.__name__ are listed
    model_types = {'OLS' : 'Ordinary least squares',
                   'GLS' : 'Generalized least squares',
                   'GLSAR' : 'Generalized least squares with AR(p)',
                   'WLS' : 'Weigthed least squares',
                   'RLM' : 'Robust linear model',
                   'GLM' : 'Generalized linear model'
                   }
    model_methods = {'OLS' : 'Least Squares',
                   'GLS' : 'Least Squares',
                   'GLSAR' : 'Least Squares',
                   'WLS' : 'Least Squares',
                   'RLM' : '?',
                   'GLM' : '?'
                   }
    if title==0:
        title = model_types[self.model.__class__.__name__]
    if yname is None:
        try:
            yname = self.model.endog_names
        except AttributeError:
            yname = 'y'
    if xname is None:
        try:
            xname = self.model.exog_names
        except AttributeError:
            xname = ['var_%d' % i for i in range(len(self.params))]
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    #dist_family = self.model.family.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid



    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    #TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left =   [('Model type:', [modeltype]),
                  ('Date:', [date]),
                  ('Dependent Variable:', yname), #What happens with multiple names?
                  ('df model', [df_model])
                  ]
    gen_stubs_left, gen_data_left = map(None, *gen_left) #transpose row col

    gen_title = title
    gen_header = None
##    gen_stubs_left = ('Model type:',
##                      'Date:',
##                      'Dependent Variable:',
##                      'df model'
##                  )
##    gen_data_left = [[modeltype],
##                     [date],
##                     yname, #What happens with multiple names?
##                     [df_model]
##                     ]
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )

    gen_stubs_right = ('Method:',
                      'Time:',
                      'Number of Obs:',
                      'df resid'
                      )
    gen_data_right = ([modeltype], #was dist family need to look at more
                      time_of_day,
                      [nobs],
                      [df_resid]
                      )
    gen_table_right = SimpleTable(gen_data_right,
                                 gen_header,
                                 gen_stubs_right,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal
    tstats = {'OLS' : self.t(),
            'GLS' : self.t(),
            'GLSAR' : self.t(),
            'WLS' : self.t(),
            'RLM' : self.t(),
            'GLM' : self.t()
            }
    prob_stats = {'OLS' : self.pvalues,
                 'GLS' : self.pvalues,
                 'GLSAR' : self.pvalues,
                 'WLS' : self.pvalues,
                 'RLM' : self.pvalues,
                 'GLM' : self.pvalues
                }
    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    param_header = {
         'OLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'WLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLM'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution
         'RLM'   : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval']  #checke z
                   }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = xrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = zip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \
                                                             exog_len]
                      )
    parameter_table = SimpleTable(params_data,
                                  param_header[modeltype],
                                  params_stubs,
                                  title = None,
                                  txt_fmt = fmt_2, #gen_fmt,
                                  )

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics


    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table)+'\n'+str(parameter_table)
        return table

    def ols_to_csv():
        """
        exports ols summary data to csv
        """
        pass
    def glm_printer():
        table = str(general_table)+'\n'+str(parameter_table)
        return table
        pass

    printers  = {'OLS': ols_printer,
                'GLM' : glm_printer
                }

    if returns=='print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()