Example #1
0
def generate_table(left_col, right_col, table_title):
    
    # Do not use column headers
    col_headers = None
    
    # Generate the right table
    if right_col:
        # Add padding
        if len(right_col) < len(left_col):
            right_col += [(' ', ' ')] * (len(left_col) - len(right_col))
        elif len(right_col) > len(left_col):
            left_col += [(' ', ' ')] * (len(right_col) - len(left_col))
        right_col = [('%-21s' % ('  '+k), v) for k,v in right_col]
        
        # Generate the right table
        gen_stubs_right, gen_data_right = zip_longest(*right_col)
        gen_table_right = SimpleTable(gen_data_right,
                                          col_headers,
                                          gen_stubs_right,
                                          title = table_title,
                                          txt_fmt = fmt_2cols)
    else:
        # If there is no right table set the right table to empty
        gen_table_right = []

    # Generate the left table  
    gen_stubs_left, gen_data_left = zip_longest(*left_col) 
    gen_table_left = SimpleTable(gen_data_left,
                                 col_headers,
                                 gen_stubs_left,
                                 title = table_title,
                                 txt_fmt = fmt_2cols)

    
    # Merge the left and right tables to make a single table
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table
Example #2
0
 def insert_header_row(self, rownum, headers, dec_below='header_dec_below'):
     """Return None.  Insert a row of headers,
     where ``headers`` is a sequence of strings.
     (The strings may contain newlines, to indicated multiline headers.)
     """
     header_rows = [header.split('\n') for header in headers]
     # rows in reverse order
     rows = list(zip_longest(*header_rows, **dict(fillvalue='')))
     rows.reverse()
     for i, row in enumerate(rows):
         self.insert(rownum, row, datatype='header')
         if i == 0:
             self[rownum].dec_below = dec_below
         else:
             self[rownum].dec_below = None
Example #3
0
 def insert_header_row(self, rownum, headers, dec_below='header_dec_below'):
     """Return None.  Insert a row of headers,
     where ``headers`` is a sequence of strings.
     (The strings may contain newlines, to indicated multiline headers.)
     """
     header_rows = [header.split('\n') for header in headers]
     # rows in reverse order
     rows = list(zip_longest(*header_rows, **dict(fillvalue='')))
     rows.reverse()
     for i, row in enumerate(rows):
         self.insert(rownum, row, datatype='header')
         if i == 0:
             self[rownum].dec_below = dec_below
         else:
             self[rownum].dec_below = None
Example #4
0
def summary(self,
            yname=None,
            xname=None,
            title=0,
            alpha=.05,
            returns='text',
            model_info=None):
    """
    Parameters
    ----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Default :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import statsmodels as sm
    >>> data = sm.datasets.longley.load(as_pandas=False)
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    if title == 0:
        title = _model_types[self.model.__class__.__name__]

    if xname is not None and len(xname) != len(self.params):
        # GH 2298
        raise ValueError('User supplied xnames must have the same number of '
                         'entries as the number of model parameters '
                         '({0})'.format(len(self.params)))

    yname, xname = _getnames(self, yname, xname)

    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid

    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    # TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left = [
        ('Model type:', [modeltype]),
        ('Date:', [date]),
        ('Dependent Variable:',
         yname),  # TODO: What happens with multiple names?
        ('df model', [df_model])
    ]
    gen_stubs_left, gen_data_left = zip_longest(*gen_left)  #transpose row col

    gen_title = title
    gen_header = None
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title=gen_title,
                                 txt_fmt=gen_fmt)

    gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid')
    gen_data_right = (
        [modeltype],  #was dist family need to look at more
        time_of_day,
        [nobs],
        [df_resid])
    gen_table_right = SimpleTable(gen_data_right,
                                  gen_header,
                                  gen_stubs_right,
                                  title=gen_title,
                                  txt_fmt=gen_fmt)
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    # Parameters part of the summary table
    # ------------------------------------
    # Note: this is not necessary since we standardized names,
    #  only t versus normal
    tstats = {
        'OLS': self.t(),
        'GLS': self.t(),
        'GLSAR': self.t(),
        'WLS': self.t(),
        'RLM': self.t(),
        'GLM': self.t()
    }
    prob_stats = {
        'OLS': self.pvalues,
        'GLS': self.pvalues,
        'GLSAR': self.pvalues,
        'WLS': self.pvalues,
        'RLM': self.pvalues,
        'GLM': self.pvalues
    }
    # Dictionary to store the header names for the parameter part of the
    # summary table. look up by modeltype
    alp = str((1 - alpha) * 100) + '%'
    param_header = {
        'OLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'GLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'GLSAR': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'WLS': ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
        'GLM': ['coef', 'std err', 't', 'P>|t|',
                alp + ' Conf. Interval'],  #glm uses t-distribution
        'RLM': ['coef', 'std err', 'z', 'P>|z|',
                alp + ' Conf. Interval']  #checke z
    }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = lrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len])
    parameter_table = SimpleTable(params_data,
                                  param_header[modeltype],
                                  params_stubs,
                                  title=None,
                                  txt_fmt=fmt_2)

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics

    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table) + '\n' + str(parameter_table)
        return table

    def glm_printer():
        table = str(general_table) + '\n' + str(parameter_table)
        return table

    printers = {'OLS': ols_printer, 'GLM': glm_printer}

    if returns == 'print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()
Example #5
0
def summary_top(results,
                title=None,
                gleft=None,
                gright=None,
                yname=None,
                xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    # time and names are always included
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    # create dictionary with default
    # use lambdas because some values raise exception if they are not available
    default_items = dict([
        ('Dependent Variable:', lambda: [yname]),
        ('Dep. Variable:', lambda: [yname]),
        ('Model:', lambda: [results.model.__class__.__name__]),
        ('Date:', lambda: [date]),
        ('Time:', lambda: time_of_day),
        ('Number of Obs:', lambda: [results.nobs]),
        ('No. Observations:', lambda: [d_or_f(results.nobs)]),
        ('Df Model:', lambda: [d_or_f(results.df_model)]),
        ('Df Residuals:', lambda: [d_or_f(results.df_resid)]),
        ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]
         )  # doesn't exist for RLM - exception
    ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        # default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None), ('Model type:', None),
                    ('Date:', None), ('No. Observations:', None),
                    ('Df model:', None), ('Df resid:', None)]

        try:
            llf = results.llf  # noqa: F841
            gen_left.append(('Log-Likelihood', None))
        except:  # AttributeError, NotImplementedError
            pass

        gen_right = []

    gen_title = title
    gen_header = None

    # replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  # let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  # let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    # check nothing was missed
    missing_values = [k for k, v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    # pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            # fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            # fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        # padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  ' + k), v) for k, v in gen_right]
        gen_stubs_right, gen_data_right = zip_longest(
            *gen_right)  #transpose row col
        gen_table_right = SimpleTable(gen_data_right,
                                      gen_header,
                                      gen_stubs_right,
                                      title=gen_title,
                                      txt_fmt=fmt_2cols)
    else:
        gen_table_right = []  #because .extend_right seems works with []

    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = zip_longest(*gen_left)  #transpose row col

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title=gen_title,
                                 txt_fmt=fmt_2cols)

    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table
Example #6
0
def summary(self, yname=None, xname=None, title=0, alpha=.05,
            returns='text', model_info=None):
    """
    Parameters
    ----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Default :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import statsmodels as sm
    >>> data = sm.datasets.longley.load(as_pandas=False)
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    if title == 0:
        title = _model_types[self.model.__class__.__name__]

    yname, xname = _getnames(self, yname, xname)

    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid

    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    # TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left = [('Model type:', [modeltype]),
                ('Date:', [date]),
                ('Dependent Variable:', yname),  # TODO: What happens with multiple names?
                ('df model', [df_model])
                ]
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_title = title
    gen_header = None
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title=gen_title,
                                 txt_fmt=gen_fmt
                                 )

    gen_stubs_right = ('Method:',
                       'Time:',
                       'Number of Obs:',
                       'df resid')
    gen_data_right = ([modeltype], #was dist family need to look at more
                      time_of_day,
                      [nobs],
                      [df_resid]
                      )
    gen_table_right = SimpleTable(gen_data_right,
                                  gen_header,
                                  gen_stubs_right,
                                  title=gen_title,
                                  txt_fmt=gen_fmt
                                  )
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    # Parameters part of the summary table
    # ------------------------------------
    # Note: this is not necessary since we standardized names,
    #  only t versus normal
    tstats = {'OLS': self.t(),
              'GLS': self.t(),
              'GLSAR': self.t(),
              'WLS': self.t(),
              'RLM': self.t(),
              'GLM': self.t()}
    prob_stats = {'OLS': self.pvalues,
                  'GLS': self.pvalues,
                  'GLSAR': self.pvalues,
                  'WLS': self.pvalues,
                  'RLM': self.pvalues,
                  'GLM': self.pvalues
                  }
    # Dictionary to store the header names for the parameter part of the
    # summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    param_header = {
         'OLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'WLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLM'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution
         'RLM'   : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval']  #checke z
                   }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = lrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len])
    parameter_table = SimpleTable(params_data,
                                  param_header[modeltype],
                                  params_stubs,
                                  title=None,
                                  txt_fmt=fmt_2
                                  )

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics

    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table)+'\n'+str(parameter_table)
        return table

    def glm_printer():
        table = str(general_table)+'\n'+str(parameter_table)
        return table

    printers = {'OLS': ols_printer, 'GLM': glm_printer}

    if returns == 'print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()
Example #7
0
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    # time and names are always included
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    # create dictionary with default
    # use lambdas because some values raise exception if they are not available
    default_items = dict([
          ('Dependent Variable:', lambda: [yname]),
          ('Dep. Variable:', lambda: [yname]),
          ('Model:', lambda: [results.model.__class__.__name__]),
          ('Date:', lambda: [date]),
          ('Time:', lambda: time_of_day),
          ('Number of Obs:', lambda: [results.nobs]),
          ('No. Observations:', lambda: [d_or_f(results.nobs)]),
          ('Df Model:', lambda: [d_or_f(results.df_model)]),
          ('Df Residuals:', lambda: [d_or_f(results.df_resid)]),
          ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf])  # doesn't exist for RLM - exception
    ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        # default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None),
                    ('Model type:', None),
                    ('Date:', None),
                    ('No. Observations:', None),
                    ('Df model:', None),
                    ('Df resid:', None)]

        try:
            llf = results.llf  # noqa: F841
            gen_left.append(('Log-Likelihood', None))
        except: # AttributeError, NotImplementedError
            pass

        gen_right = []

    gen_title = title
    gen_header = None

    # replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  # let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  # let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    # check nothing was missed
    missing_values = [k for k,v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    # pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            # fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            # fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        # padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  '+k), v) for k,v in gen_right]
        gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col
        gen_table_right = SimpleTable(gen_data_right,
                                      gen_header,
                                      gen_stubs_right,
                                      title=gen_title,
                                      txt_fmt=fmt_2cols
                                      )
    else:
        gen_table_right = []  #because .extend_right seems works with []

    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title=gen_title,
                                 txt_fmt=fmt_2cols
                                 )

    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table