Esempio n. 1
0
    def test_SimpleTable_3(self):
        """ Test SimpleTable.extend() as in extend down"""
        desired = '''
==============================
           header s1 header d1
------------------------------
stub R1 C1  10.30312  10.73999
stub R2 C1  90.30312  90.73999
           header s2 header d2
------------------------------
stub R1 C2  50.95038  50.65765
stub R2 C2  40.95038  40.65765
------------------------------
'''
        data1 = [[10.30312, 10.73999], [90.30312, 90.73999]]
        data2 = [[50.95038, 50.65765], [40.95038, 40.65765]]
        stubs1 = ['stub R1 C1', 'stub R2 C1']
        stubs2 = ['stub R1 C2', 'stub R2 C2']
        header1 = ['header s1', 'header d1']
        header2 = ['header s2', 'header d2']
        actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt)
        actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt)
        actual1.extend(actual2)
        actual = '\n%s\n' % actual1.as_text()
        self.assertEqual(desired, str(actual))
Esempio n. 2
0
    def summary_table(self, float_fmt="%6.3f"):
        '''create a summary table with all influence and outlier measures

        This does currently not distinguish between statistics that can be
        calculated from the original regression results and for which a
        leave-one-observation-out loop is needed

        Returns
        -------
        res : SimpleTable instance
           SimpleTable instance with the results, can be printed

        Notes
        -----
        This also attaches table_data to the instance.



        '''
        #print self.dfbetas

        #        table_raw = [ np.arange(self.nobs),
        #                      self.endog,
        #                      self.fittedvalues,
        #                      self.cooks_distance(),
        #                      self.resid_studentized_internal,
        #                      self.hat_matrix_diag,
        #                      self.dffits_internal,
        #                      self.resid_studentized_external,
        #                      self.dffits,
        #                      self.dfbetas
        #                      ]
        table_raw = [
            ('obs', np.arange(self.nobs)),
            ('endog', self.endog),
            ('fitted\nvalue', self.results.fittedvalues),
            ("Cook's\nd", self.cooks_distance[0]),
            ("student.\nresidual", self.resid_studentized_internal),
            ('hat diag', self.hat_matrix_diag),
            ('dffits \ninternal', self.dffits_internal[0]),
            ("ext.stud.\nresidual", self.resid_studentized_external),
            ('dffits', self.dffits[0]),
            ('dfbeta\nslope', self.dfbetas[:, 1]
             )  #skip needs to partially unravel
        ]
        colnames, data = zip(*table_raw)  #unzip
        data = np.column_stack(data)
        self.table_data = data
        from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt
        from gwstatsmodels.iolib.tableformatting import fmt_base
        from copy import deepcopy
        fmt = deepcopy(fmt_base)
        fmt_html = deepcopy(default_html_fmt)
        fmt['data_fmts'] = ["%4d"] + [float_fmt] * (data.shape[1] - 1)
        #fmt_html['data_fmts'] = fmt['data_fmts']
        return SimpleTable(data,
                           headers=colnames,
                           txt_fmt=fmt,
                           html_fmt=fmt_html)
Esempio n. 3
0
    def test_SimpleTable_1(self):
        """Basic test, test_SimpleTable_1"""
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312 2.73999
stub2 1.95038 2.65765
---------------------
'''
        test1data = [[1.30312, 2.73999],[1.95038, 2.65765]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data, test1header, test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        self.assertEqual(desired, str(actual))
Esempio n. 4
0
    def summary_quantiles(self,
                          idx,
                          distppf,
                          frac=[0.01, 0.025, 0.05, 0.1, 0.975],
                          varnames=None,
                          title=None):
        '''summary table for quantiles (critical values)

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distppf : callable
            probability density function of reference distribution
            TODO: use `crit` values instead or additional, see summary_cdf
        frac : array_like, float
            probabilities for which
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print table` to see results

        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        quant, mcq = self.quantiles(idx, frac=frac)
        #not sure whether this will work with single quantile
        #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T)
        crit = distppf(np.atleast_2d(quant).T)
        mml = []
        for i, ix in enumerate(idx):  #TODO: hardcoded 2 ?
            mml.extend([mcq[:, i], crit[:, i]])
        #mmlar = np.column_stack(mml)
        mmlar = np.column_stack([quant] + mml)
        #print mmlar.shape
        if title:
            title = title + ' Quantiles (critical values)'
        else:
            title = 'Quantiles (critical values)'
        #TODO use stub instead
        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1] // 2)]
        headers = ['\nprob'] + [
            '%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist']
        ]
        return SimpleTable(mmlar,
                           txt_fmt={
                               'data_fmts':
                               ["%#6.3f"] + ["%#10.4f"] * (mmlar.shape[1] - 1)
                           },
                           title=title,
                           headers=headers)
Esempio n. 5
0
    def test_customlabel(self):
        """Limited test of custom custom labeling"""
        if has_numpy:
            tbl = SimpleTable(table1data, test1header, test1stubs, txt_fmt=txt_fmt1)
            tbl[1][1].data = np.nan
            tbl.label_cells(custom_labeller)
            # print([[c.datatype for c in row] for row in tbl])
            desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    --   *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
            actual = "\n%s\n" % tbl.as_text(missing="--")
            # print(actual)
            # print(desired)
            self.assertEqual(actual, desired)
Esempio n. 6
0
    def test_SimpleTable_1(self):
        """Basic test, test_SimpleTable_1"""
        desired = '''
=====================
      header1 header2
---------------------
stub1 1.30312 2.73999
stub2 1.95038 2.65765
---------------------
'''
        test1data = [[1.30312, 2.73999], [1.95038, 2.65765]]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        actual = SimpleTable(test1data,
                             test1header,
                             test1stubs,
                             txt_fmt=default_txt_fmt)
        actual = '\n%s\n' % actual.as_text()
        self.assertEqual(desired, str(actual))
Esempio n. 7
0
    def test_SimpleTable_3(self):
        """ Test SimpleTable.extend() as in extend down"""
        desired = '''
==============================
           header s1 header d1
------------------------------
stub R1 C1  10.30312  10.73999
stub R2 C1  90.30312  90.73999
           header s2 header d2
------------------------------
stub R1 C2  50.95038  50.65765
stub R2 C2  40.95038  40.65765
------------------------------
'''
        data1 = [[10.30312, 10.73999], [90.30312, 90.73999]]
        data2 = [[50.95038, 50.65765], [40.95038, 40.65765]]
        stubs1 = ['stub R1 C1', 'stub R2 C1']
        stubs2 = ['stub R1 C2', 'stub R2 C2']
        header1 = ['header s1', 'header d1']
        header2 = ['header s2', 'header d2']
        actual1 = SimpleTable(data1, header1, stubs1, txt_fmt=default_txt_fmt)
        actual2 = SimpleTable(data2, header2, stubs2, txt_fmt=default_txt_fmt)
        actual1.extend(actual2)
        actual = '\n%s\n' % actual1.as_text()
        self.assertEqual(desired, str(actual))
Esempio n. 8
0
    def summary_cdf(self, idx, frac, crit, varnames=None, title=None):
        '''summary table for cumulative density function


        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        frac : array_like, float
            probabilities for which
        crit : array_like
            values for which cdf is calculated
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print table` to see results


        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        mml = []
        #TODO:need broadcasting in cdf
        for i in range(len(idx)):
            #print i, mc1.cdf(crit[:,i], [idx[i]])[1].ravel()
            mml.append(self.cdf(crit[:, i], [idx[i]])[1].ravel())
        #mml = self.cdf(crit, idx)[1]
        #mmlar = np.column_stack(mml)
        #print mml[0].shape, np.shape(frac)
        mmlar = np.column_stack([frac] + mml)
        #print mmlar.shape
        if title:
            title = title + ' Probabilites'
        else:
            title = 'Probabilities'
        #TODO use stub instead
        #headers = ['\nprob'] + ['var%d\n%s' % (i, t) for i in range(mmlar.shape[1]-1) for t in ['mc']]

        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1] - 1)]
        headers = ['prob'] + varnames
        return SimpleTable(mmlar,
                           txt_fmt={
                               'data_fmts': ["%#6.3f"] + ["%#10.4f"] *
                               (np.array(mml).shape[1] - 1)
                           },
                           title=title,
                           headers=headers)
Esempio n. 9
0
    def test_customlabel(self):
        """Limited test of custom custom labeling"""
        if has_numpy:
            tbl = SimpleTable(table1data,
                              test1header,
                              test1stubs,
                              txt_fmt=txt_fmt1)
            tbl[1][1].data = np.nan
            tbl.label_cells(custom_labeller)
            #print([[c.datatype for c in row] for row in tbl])
            desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    --   *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
            actual = '\n%s\n' % tbl.as_text(missing='--')
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)
Esempio n. 10
0
 def summary_proc(self, g):
     """
     For internal use
     """
     if self.exog != None:
         myTitle = ('exog = ' + str(self.groups[g]) + '\n')
     else:
         myTitle = "Kaplan-Meier Curve"
     table = np.transpose(self.results[g])
     table = np.c_[np.transpose(self.ts[g]),table]
     table = SimpleTable(table, headers=['Time','Survival','Std. Err'],
                         title = myTitle)
     print(table)
Esempio n. 11
0
    def summary_find_nfact(self):
        '''provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        '''
        if not hasattr(self, 'results_find_nfact'):
            self.fit_find_nfact()


        results = self.results_find_nfact
        sumstr = ''
        sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O'
#        best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
#                     np.argmin(results[:,-1],0))]

        sumstr += '\n' + ' '*19 + '%5d %4d %6d %5d' % tuple(self.best_nfact)

        from gwstatsmodels.iolib.table import (SimpleTable, default_txt_fmt,
                                default_latex_fmt, default_html_fmt)

        headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
        numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f'
        txt_fmt1 = dict(data_fmts = numformat)
        tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

        sumstr += '\n' + "PCA regression on simulated data,"
        sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables"
        sumstr += '\n' + tabl.__str__()
        sumstr += '\n' + "Notes: k is number of components of PCA,"
        sumstr += '\n' + "       constant is added additionally"
        sumstr += '\n' + "       k=0 means regression on constant only"
        sumstr += '\n' + "       L1O: sum of squared prediction errors for leave-one-out"
        return sumstr
Esempio n. 12
0
    def summary_find_nfact(self):
        '''provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        '''
        if not hasattr(self, 'results_find_nfact'):
            self.fit_find_nfact()

        results = self.results_find_nfact
        sumstr = ''
        sumstr += '\n' + 'Best result for k, by AIC, BIC, R2_adj, L1O'
        #        best = np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
        #                     np.argmin(results[:,-1],0))]

        sumstr += '\n' + ' ' * 19 + '%5d %4d %6d %5d' % tuple(self.best_nfact)

        from gwstatsmodels.iolib.table import (SimpleTable, default_txt_fmt,
                                               default_latex_fmt,
                                               default_html_fmt)

        headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
        numformat = ['%6d'] + ['%10.3f'] * 4  #'%10.4f'
        txt_fmt1 = dict(data_fmts=numformat)
        tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

        sumstr += '\n' + "PCA regression on simulated data,"
        sumstr += '\n' + "DGP: 2 factors and 4 explanatory variables"
        sumstr += '\n' + tabl.__str__()
        sumstr += '\n' + "Notes: k is number of components of PCA,"
        sumstr += '\n' + "       constant is added additionally"
        sumstr += '\n' + "       k=0 means regression on constant only"
        sumstr += '\n' + "       L1O: sum of squared prediction errors for leave-one-out"
        return sumstr
Esempio n. 13
0
    def print_summary(self, stats, orientation='auto'):
#TODO: need to specify a table formating for the numbers, using defualt
        title = 'Summary Statistics'
        header = stats
        stubs = self.univariate['obs'][1]
        data = [[self.univariate[astat][2][col] for astat in stats] for col in
                                range(len(self.univariate['obs'][2]))]

        if (orientation == 'varcols') or \
           (orientation == 'auto' and len(stubs) < len(header)):
            #swap rows and columns
            data = map(lambda *row: list(row), *data)
            header, stubs = stubs, header

        part_fmt = dict(data_fmts = ["%#8.4g"]*(len(header)-1))
        table = SimpleTable(data,
                            header,
                            stubs,
                            title=title,
                            txt_fmt = part_fmt)

        return table
Esempio n. 14
0
                title_align='r',
                header_align='r',
                data_aligns="r",
                stubs_align="l",
                fmt='txt')
cell0data = 0.0000
cell1data = 1
row0data = [cell0data, cell1data]
row1data = [2, 3.333]
table1data = [row0data, row1data]
test1stubs = ('stub1', 'stub2')
test1header = ('header1', 'header2')
#test1header = ('header1\nheader1a', 'header2\nheader2a')
tbl = SimpleTable(table1data,
                  test1header,
                  test1stubs,
                  txt_fmt=txt_fmt1,
                  ltx_fmt=ltx_fmt1,
                  html_fmt=html_fmt1)


def custom_labeller(cell):
    if cell.data is np.nan:
        return 'missing'


class test_Cell(unittest.TestCase):
    def test_celldata(self):
        celldata = cell0data, cell1data, row1data[0], row1data[1]
        cells = [
            Cell(datum, datatype=i % 2) for i, datum in enumerate(celldata)
        ]
Esempio n. 15
0
    def test_SimpleTable_4(self):
        """Basic test, test_SimpleTable_4
        test uses custom txt_fmt"""
        txt_fmt1 = dict(data_fmts=['%3.2f', '%d'],
                        empty_cell=' ',
                        colwidths=1,
                        colsep=' * ',
                        row_pre='* ',
                        row_post=' *',
                        table_dec_above='*',
                        table_dec_below='*',
                        header_dec_below='*',
                        header_fmt='%s',
                        stub_fmt='%s',
                        title_align='r',
                        header_align='r',
                        data_aligns="r",
                        stubs_align="l",
                        fmt='txt')
        ltx_fmt1 = default_latex_fmt.copy()
        html_fmt1 = default_html_fmt.copy()
        cell0data = 0.0000
        cell1data = 1
        row0data = [cell0data, cell1data]
        row1data = [2, 3.333]
        table1data = [row0data, row1data]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        tbl = SimpleTable(table1data,
                          test1header,
                          test1stubs,
                          txt_fmt=txt_fmt1,
                          ltx_fmt=ltx_fmt1,
                          html_fmt=html_fmt1)

        def test_txt_fmt1(self):
            """Limited test of custom txt_fmt"""
            desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    0.00 *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
            actual = '\n%s\n' % tbl.as_text()
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)

            def test_ltx_fmt1(self):
                """Limited test of custom ltx_fmt"""
                desired = r"""
\begin{tabular}{lcc}
\toprule
                        & \textbf{header1} & \textbf{header2}  \\
\midrule
\textbf{stub1} &       0.0        &        1          \\
\textbf{stub2} &        2         &      3.333        \\
\bottomrule
\end{tabular}
"""

            actual = '\n%s\n' % tbl.as_latex_tabular()
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)

        def test_html_fmt1(self):
            """Limited test of custom html_fmt"""
            desired = """
<table class="simpletable">
<tr>
    <td></td>    <th>header1</th> <th>header2</th>
</tr>
<tr>
  <th>stub1</th>   <td>0.0</td>      <td>1</td>
</tr>
<tr>
  <th>stub2</th>    <td>2</td>     <td>3.333</td>
</tr>
</table>
"""
            actual = '\n%s\n' % tbl.as_html()
            self.assertEqual(actual, desired)
Esempio n. 16
0
def summary_table(res, alpha=0.05):
    '''generate summary table of outlier and influence similar to SAS

    Parameters
    ----------
    alpha : float
       significance level for confidence interval

    Returns
    -------
    st : SimpleTable instance
       table with results that can be printed
    data : ndarray
       calculated measures and statistics for the table
    ss2 : list of strings
       column_names for table (Note: rows of table are observations)

    '''

    from scipy import stats
    from gwstatsmodels.sandbox.regression.predstd import wls_prediction_std

    infl = Influence(res)

    #standard error for predicted mean
    #Note: using hat_matrix only works for fitted values
    predict_mean_se = np.sqrt(infl.hat_matrix_diag * res.mse_resid)

    tppf = stats.t.isf(alpha / 2., res.df_resid)
    predict_mean_ci = np.column_stack([
        res.fittedvalues - tppf * predict_mean_se,
        res.fittedvalues + tppf * predict_mean_se
    ])

    #standard error for predicted observation
    predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res)
    predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))

    #standard deviation of residual
    resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))

    table_sm = np.column_stack([
        np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues,
        predict_mean_se, predict_mean_ci[:, 0], predict_mean_ci[:, 1],
        predict_ci[:, 0], predict_ci[:, 1], res.resid, resid_se,
        infl.resid_studentized_internal, infl.cooks_distance[0]
    ])

    #colnames, data = zip(*table_raw) #unzip
    data = table_sm
    ss2 = [
        'Obs', 'Dep Var\nPopulation', 'Predicted\nValue',
        'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp',
        'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual',
        'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"
    ]
    colnames = ss2
    #self.table_data = data
    #data = np.column_stack(data)
    from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt
    from gwstatsmodels.iolib.tableformatting import fmt_base
    from copy import deepcopy
    fmt = deepcopy(fmt_base)
    fmt_html = deepcopy(default_html_fmt)
    fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
    #fmt_html['data_fmts'] = fmt['data_fmts']
    st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html)

    return st, data, ss2
Esempio n. 17
0
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None,
                      title=None):
    '''create summary table of regression parameters with several equations

    This allows interleaving of parameters with bse and/or tvalues

    Parameter
    ---------
    result : result instance
        the result instance with params and attributes in extras
    extras : list of strings
        additional attributes to add below a parameter row, e.g. bse or tvalues
    endog_names : None or list of strings
        names for rows of the parameter array (multivariate endog)
    exog_names : None or list of strings
        names for columns of the parameter array (exog)
    alpha : float
        level for confidence intervals, default 0.95
    title : None or string

    Returns
    -------
    tables : list of SimpleTable
        this contains a list of all seperate Subtables
    table_all : SimpleTable
        the merged table with results concatenated for each row of the parameter
        array

    '''
    if endog_names is None:
        #TODO: note the [1:] is specific to current MNLogit
        endog_names = ['endog_%d' % i for i in
                            np.unique(result.model.endog)[1:]]
    if exog_names is None:
        exog_names = ['var%d' %i for i in range(len(result.params))]

    #TODO: check formatting options with different values
    #res_params = [['%10.4f'%item for item in row] for row in result.params]
    res_params = [[forg(item, prec=4) for item in row] for row in result.params]
    if extras: #not None or non-empty
        #maybe this should be a simple triple loop instead of list comprehension?
        #below_list = [[['%10s' % ('('+('%10.3f'%v).strip()+')')
        extras_list = [[['%10s' % ('(' + forg(v, prec=3).strip() + ')')
                                for v in col]
                                for col in getattr(result, what)]
                                for what in extras
                                ]
        data = zip(res_params, *extras_list)
        data = [i for j in data for i in j]  #flatten
        stubs = zip(endog_names, *[['']*len(endog_names)]*len(extras))
        stubs = [i for j in stubs for i in j] #flatten
        #return SimpleTable(data, headers=exog_names, stubs=stubs)
    else:
        data = res_params
        stubs = endog_names
#        return SimpleTable(data, headers=exog_names, stubs=stubs,
#                       data_fmts=['%10.4f'])

    import copy
    txt_fmt = copy.deepcopy(fmt_params)
    txt_fmt.update(dict(data_fmts = ["%s"]*result.params.shape[1]))
    return SimpleTable(data, headers=exog_names,
                             stubs=stubs,
                             title=title,
#                             data_fmts = ["%s"]),
                             txt_fmt = txt_fmt)
Esempio n. 18
0
def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True,
                   skip_header=False):
    '''create a summary table for the parameters

    Parameters
    ----------
    res : results instance
        some required information is directly taken from the result
        instance
    yname : string or None
        optional name for the endogenous variable, default is "y"
    xname : list of strings or None
        optional names for the exogenous variables, default is "var_xx"
    alpha : float
        significance level for the confidence intervals
    use_t : bool
        indicator whether the p-values are based on the Student-t
        distribution (if True) or on the normal distribution (if False)
    skip_headers : bool
        If false (default), then the header row is added. If true, then no
        header row is added.

    Returns
    -------
    params_table : SimpleTable instance
    '''

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal

    if isinstance(results, tuple):
        #for multivariate endog
        #TODO: check whether I don't want to refactor this
        #we need to give parameter alpha to conf_int
        results, params, std_err, tvalues, pvalues, conf_int = results
    else:
        params = results.params
        std_err = results.bse
        tvalues = results.tvalues  #is this sometimes called zvalues
        pvalues = results.pvalues
        conf_int = results.conf_int(alpha)


    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    if use_t:
        param_header = ['coef', 'std err', 't', 'P>|t|',
                        '[' + alp + ' Conf. Int.]']
    else:
        param_header = ['coef', 'std err', 'z', 'P>|z|',
                        '[' + alp + ' Conf. Int.]']

    if skip_header:
        param_header = None


    _, xname = _getnames(results, yname=yname, xname=xname)

    params_stubs = xname

    exog_idx = xrange(len(xname))

    #center confidence intervals if they are unequal lengths
#    confint = ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
#                                                             exog_idx]
    confint = ["%s %s" % tuple(map(forg, conf_int[i])) for i in \
                                                             exog_idx]
    len_ci = map(len, confint)
    max_ci = max(len_ci)
    min_ci = min(len_ci)

    if min_ci < max_ci:
        confint = [ci.center(max_ci) for ci in confint]

    #explicit f/g formatting, now uses forg, f or g depending on values
#    params_data = zip(["%#6.4g" % (params[i]) for i in exog_idx],
#                       ["%#6.4f" % (std_err[i]) for i in exog_idx],
#                       ["%#6.3f" % (tvalues[i]) for i in exog_idx],
#                       ["%#6.3f" % (pvalues[i]) for i in exog_idx],
#                       confint
##                       ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
##                                                             exog_idx]
#                      )

    params_data = zip([forg(params[i], prec=4) for i in exog_idx],
                       [forg(std_err[i]) for i in exog_idx],
                       [forg(tvalues[i]) for i in exog_idx],
                       ["%#6.3f" % (pvalues[i]) for i in exog_idx],
                       confint
#                       ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
#                                                             exog_idx]
                      )
    parameter_table = SimpleTable(params_data,
                                  param_header,
                                  params_stubs,
                                  title = None,
                                  txt_fmt = fmt_params #gen_fmt #fmt_2, #gen_fmt,
                                  )

    return parameter_table
Esempio n. 19
0
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    #time and names are always included
    import time
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    #create dictionary with default
    #use lambdas because some values raise exception if they are not available
    #alternate spellings are commented out to force unique labels
    default_items = dict([
          ('Dependent Variable:', lambda: [yname]),
          ('Dep. Variable:', lambda: [yname]),
          ('Model:', lambda: [results.model.__class__.__name__]),
          #('Model type:', lambda: [results.model.__class__.__name__]),
          ('Date:', lambda: [date]),
          ('Time:', lambda: time_of_day),
          ('Number of Obs:', lambda: [results.nobs]),
          #('No. of Observations:', lambda: ["%#6d" % results.nobs]),
          ('No. Observations:', lambda: ["%#6d" % results.nobs]),
          #('Df model:', lambda: [results.df_model]),
          ('Df Model:', lambda: ["%#6d" % results.df_model]),
          #TODO: check when we have non-integer df
          ('Df Residuals:', lambda: ["%#6d" % results.df_resid]),
          #('Df resid:', lambda: [results.df_resid]),
          #('df resid:', lambda: [results.df_resid]), #check capitalization
          ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) #doesn't exist for RLM - exception
          #('Method:', lambda: [???]), #no default for this
          ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        #default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None),
                    ('Model type:', None),
                    ('Date:', None),
                    ('No. Observations:', None)
                    ('Df model:', None),
                    ('Df resid:', None)]

        try:
            llf = results.llf
            gen_left.append(('Log-Likelihood', None))
        except: #AttributeError, NotImplementedError
            pass

        gen_right = []


    gen_title = title
    gen_header = None

    #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore
    #replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  #let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  #let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    #check
    missing_values = [k for k,v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    #pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            #fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            #fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        #padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  '+k), v) for k,v in gen_right]
        gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col
        gen_table_right = SimpleTable(gen_data_right,
                                      gen_header,
                                      gen_stubs_right,
                                      title = gen_title,
                                      txt_fmt = fmt_2cols #gen_fmt
                                      )
    else:
        gen_table_right = []  #because .extend_right seems works with []


    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = fmt_2cols
                                 )

    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table #, gen_table_left, gen_table_right
Esempio n. 20
0
def summary(self, yname=None, xname=None, title=0, alpha=.05,
            returns='text', model_info=None):
    """
    Parameters
    -----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Defualt :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import gwstatsmodels as sm
    >>> data = sm.datasets.longley.load()
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    import time as time



    #TODO Make sure all self.model.__class__.__name__ are listed
    model_types = {'OLS' : 'Ordinary least squares',
                   'GLS' : 'Generalized least squares',
                   'GLSAR' : 'Generalized least squares with AR(p)',
                   'WLS' : 'Weigthed least squares',
                   'RLM' : 'Robust linear model',
                   'GLM' : 'Generalized linear model'
                   }
    model_methods = {'OLS' : 'Least Squares',
                   'GLS' : 'Least Squares',
                   'GLSAR' : 'Least Squares',
                   'WLS' : 'Least Squares',
                   'RLM' : '?',
                   'GLM' : '?'
                   }
    if title==0:
        title = model_types[self.model.__class__.__name__]
    if yname is None:
        try:
            yname = self.model.endog_names
        except AttributeError:
            yname = 'y'
    if xname is None:
        try:
            xname = self.model.exog_names
        except AttributeError:
            xname = ['var_%d' % i for i in range(len(self.params))]
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    #dist_family = self.model.family.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid

    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    #TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left =   [('Model type:', [modeltype]),
                  ('Date:', [date]),
                  ('Dependent Variable:', yname), #What happens with multiple names?
                  ('df model', [df_model])
                  ]
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_title = title
    gen_header = None
##    gen_stubs_left = ('Model type:',
##                      'Date:',
##                      'Dependent Variable:',
##                      'df model'
##                  )
##    gen_data_left = [[modeltype],
##                     [date],
##                     yname, #What happens with multiple names?
##                     [df_model]
##                     ]
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )

    gen_stubs_right = ('Method:',
                      'Time:',
                      'Number of Obs:',
                      'df resid'
                      )
    gen_data_right = ([modeltype], #was dist family need to look at more
                      time_of_day,
                      [nobs],
                      [df_resid]
                      )
    gen_table_right = SimpleTable(gen_data_right,
                                 gen_header,
                                 gen_stubs_right,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal
    tstats = {'OLS' : self.t(),
            'GLS' : self.t(),
            'GLSAR' : self.t(),
            'WLS' : self.t(),
            'RLM' : self.t(),
            'GLM' : self.t()
            }
    prob_stats = {'OLS' : self.pvalues,
                 'GLS' : self.pvalues,
                 'GLSAR' : self.pvalues,
                 'WLS' : self.pvalues,
                 'RLM' : self.pvalues,
                 'GLM' : self.pvalues
                }
    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    param_header = {
         'OLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'WLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLM'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution
         'RLM'   : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval']  #checke z
                   }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = xrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = zip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in \
                                                             exog_len]
                      )
    parameter_table = SimpleTable(params_data,
                                  param_header[modeltype],
                                  params_stubs,
                                  title = None,
                                  txt_fmt = fmt_2, #gen_fmt,
                                  )

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics


    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table)+'\n'+str(parameter_table)
        return table

    def ols_to_csv():
        """
        exports ols summary data to csv
        """
        pass
    def glm_printer():
        table = str(general_table)+'\n'+str(parameter_table)
        return table
        pass

    printers  = {'OLS': ols_printer,
                'GLM' : glm_printer
                }

    if returns=='print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()