Exemple #1
0
def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None):
    '''generate top table(s)


    TODO: this still uses predefined model_methods
    ? allow gleft, gright to be 1 element tuples instead of filling with None?

    '''
    #change of names ?
    gen_left, gen_right = gleft, gright

    # time and names are always included
    import time
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)

    yname, xname = _getnames(results, yname=yname, xname=xname)

    # create dictionary with default
    # use lambdas because some values raise exception if they are not available
    default_items = dict([
          ('Dependent Variable:', lambda: [yname]),
          ('Dep. Variable:', lambda: [yname]),
          ('Model:', lambda: [results.model.__class__.__name__]),
          # ('Model type:', lambda: [results.model.__class__.__name__]),
          ('Date:', lambda: [date]),
          ('Time:', lambda: time_of_day),
          ('Number of Obs:', lambda: [results.nobs]),
          ('No. Observations:', lambda: [d_or_f(results.nobs)]),
          ('Df Model:', lambda: [d_or_f(results.df_model)]),
          ('Df Residuals:', lambda: [d_or_f(results.df_resid)]),
          ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf])  # doesn't exist for RLM - exception
          # ('Method:', lambda: [???]), # no default for this
    ])

    if title is None:
        title = results.model.__class__.__name__ + 'Regression Results'

    if gen_left is None:
        # default: General part of the summary table, Applicable to all? models
        gen_left = [('Dep. Variable:', None),
                    ('Model type:', None),
                    ('Date:', None),
                    ('No. Observations:', None),
                    ('Df model:', None),
                    ('Df resid:', None)]

        try:
            llf = results.llf
            gen_left.append(('Log-Likelihood', None))
        except: # AttributeError, NotImplementedError
            pass

        gen_right = []


    gen_title = title
    gen_header = None

    #needed_values = [k for k,v in gleft + gright if v is None] #not used anymore
    #replace missing (None) values with default values
    gen_left_ = []
    for item, value in gen_left:
        if value is None:
            value = default_items[item]()  #let KeyErrors raise exception
        gen_left_.append((item, value))
    gen_left = gen_left_

    if gen_right:
        gen_right_ = []
        for item, value in gen_right:
            if value is None:
                value = default_items[item]()  #let KeyErrors raise exception
            gen_right_.append((item, value))
        gen_right = gen_right_

    #check
    missing_values = [k for k,v in gen_left + gen_right if v is None]
    assert missing_values == [], missing_values

    #pad both tables to equal number of rows
    if gen_right:
        if len(gen_right) < len(gen_left):
            #fill up with blank lines to same length
            gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right))
        elif len(gen_right) > len(gen_left):
            #fill up with blank lines to same length, just to keep it symmetric
            gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left))

        #padding in SimpleTable doesn't work like I want
        #force extra spacing and exact string length in right table
        gen_right = [('%-21s' % ('  '+k), v) for k,v in gen_right]
        gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col
        gen_table_right = SimpleTable(gen_data_right,
                                      gen_header,
                                      gen_stubs_right,
                                      title = gen_title,
                                      txt_fmt = fmt_2cols #gen_fmt
                                      )
    else:
        gen_table_right = []  #because .extend_right seems works with []


    #moved below so that we can pad if needed to match length of gen_right
    #transpose rows and columns, `unzip`
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = fmt_2cols
                                 )

    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    return general_table #, gen_table_left, gen_table_right
Exemple #2
0
##    print 'llf:  ', res.llf
##    print 'R2    ', res.rsquared
##    print 'R2 adj', res.rsquared_adj
    prederr2 = 0.
    for inidx, outidx in LeaveOneOut(len(y0)):
        resl1o = sm.OLS(y0[inidx], fact_wconst[inidx,:]).fit()
        #print data.endog[outidx], res.model.predict(data.exog[outidx,:]),
        prederr2 += (y0[outidx] - resl1o.predict(fact_wconst[outidx,:]))**2.
    results.append([k, res.aic, res.bic, res.rsquared_adj, prederr2])

results = np.array(results)
print(results)
print('best result for k, by AIC, BIC, R2_adj, L1O')
print(np.r_[(np.argmin(results[:,1:3],0), np.argmax(results[:,3],0),
             np.argmin(results[:,-1],0))])

from statsmodels.iolib.table import SimpleTable

headers = 'k, AIC, BIC, R2_adj, L1O'.split(', ')
numformat = ['%6d'] + ['%10.3f']*4 #'%10.4f'
txt_fmt1 = dict(data_fmts = numformat)
tabl = SimpleTable(results, headers, None, txt_fmt=txt_fmt1)

print("PCA regression on simulated data,")
print("DGP: 2 factors and 4 explanatory variables")
print(tabl)
print("Notes: k is number of components of PCA,")
print("       constant is added additionally")
print("       k=0 means regression on constant only")
print("       L1O: sum of squared prediction errors for leave-one-out")
Exemple #3
0
 def add_table(self, res, header, index, title):
     table = SimpleTable(res, header, index, title)
     self.tables.append(table)
                title_align='r',
                header_align='r',
                data_aligns="r",
                stubs_align="l",
                fmt='txt')
cell0data = 0.0000
cell1data = 1
row0data = [cell0data, cell1data]
row1data = [2, 3.333]
table1data = [row0data, row1data]
test1stubs = ('stub1', 'stub2')
test1header = ('header1', 'header2')
#test1header = ('header1\nheader1a', 'header2\nheader2a')
tbl = SimpleTable(table1data,
                  test1header,
                  test1stubs,
                  txt_fmt=txt_fmt1,
                  ltx_fmt=ltx_fmt1,
                  html_fmt=html_fmt1)


def custom_labeller(cell):
    if cell.data is np.nan:
        return 'missing'


class TestCell:
    def test_celldata(self):
        celldata = cell0data, cell1data, row1data[0], row1data[1]
        cells = [
            Cell(datum, datatype=i % 2) for i, datum in enumerate(celldata)
        ]
Exemple #5
0
    def summary(self):
        """
        Summarize the fitted Model

        Returns
        -------
        smry : Summary instance
            This holds the summary table and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary
        """
        from statsmodels.iolib.summary import Summary
        from statsmodels.iolib.table import SimpleTable
        model = self.model
        title = model.__class__.__name__ + ' Model Results'

        dep_variable = 'endog'
        if isinstance(self.model.endog, pd.DataFrame):
            dep_variable = self.model.endog.columns[0]
        elif isinstance(self.model.endog, pd.Series):
            dep_variable = self.model.endog.name
        seasonal_periods = None if self.model.seasonal is None else self.model.seasonal_periods
        lookup = {
            'add': 'Additive',
            'additive': 'Additive',
            'mul': 'Multiplicative',
            'multiplicative': 'Multiplicative',
            None: 'None'
        }
        transform = self.params['use_boxcox']
        box_cox_transform = True if transform else False
        box_cox_coeff = transform if isinstance(
            transform, string_types) else self.params['lamda']
        if isinstance(box_cox_coeff, float):
            box_cox_coeff = '{:>10.5f}'.format(box_cox_coeff)
        top_left = [('Dep. Variable:', [dep_variable]),
                    ('Model:', [model.__class__.__name__]),
                    ('Optimized:', [str(np.any(self.optimized))]),
                    ('Trend:', [lookup[self.model.trend]]),
                    ('Seasonal:', [lookup[self.model.seasonal]]),
                    ('Seasonal Periods:', [str(seasonal_periods)]),
                    ('Box-Cox:', [str(box_cox_transform)]),
                    ('Box-Cox Coeff.:', [str(box_cox_coeff)])]

        top_right = [('No. Observations:', [str(len(self.model.endog))]),
                     ('SSE', ['{:5.3f}'.format(self.sse)]),
                     ('AIC', ['{:5.3f}'.format(self.aic)]),
                     ('BIC', ['{:5.3f}'.format(self.bic)]),
                     ('AICC', ['{:5.3f}'.format(self.aicc)]), ('Date:', None),
                     ('Time:', None)]

        smry = Summary()
        smry.add_table_2cols(self,
                             gleft=top_left,
                             gright=top_right,
                             title=title)
        formatted = self.params_formatted  # type: pd.DataFrame

        def _fmt(x):
            abs_x = np.abs(x)
            scale = 1
            if abs_x != 0:
                scale = int(np.log10(abs_x))
            if scale > 4 or scale < -3:
                return '{:>20.5g}'.format(x)
            dec = min(7 - scale, 7)
            fmt = '{{:>20.{0}f}}'.format(dec)
            return fmt.format(x)

        tab = []
        for _, vals in formatted.iterrows():
            tab.append([
                _fmt(vals.iloc[1]), '{0:>20}'.format(vals.iloc[0]),
                '{0:>20}'.format(str(bool(vals.iloc[2])))
            ])
        params_table = SimpleTable(tab,
                                   headers=['coeff', 'code', 'optimized'],
                                   title="",
                                   stubs=list(formatted.index))

        smry.tables.append(params_table)

        return smry
Exemple #6
0
def summary_table(title=None,
                  dep_var='',
                  model_name='',
                  method='',
                  date='',
                  time='',
                  aic=None,
                  bic=None,
                  num_obs=None,
                  df_resid=None,
                  df_model=None,
                  rho_squared=None,
                  rho_bar_squared=None,
                  log_likelihood=None,
                  null_log_likelihood=None,
                  x_names=[],
                  coefs=[],
                  std_errs=[],
                  t_scores=[],
                  alpha=None):
    """
    Generate a summary table of estimation results using Statsmodels SimpleTable. Still a
    work in progress.

    SimpleTable is maddening to work with, so it would be nice to find an alternative. It
    would need to support pretty-printing of formatted tables to plaintext and ideally
    also to HTML and Latex.

    At first it looked like we could use Statsmodels's summary table generator directly
    (iolib.summary.Summary), but this requires a Statsmodels results object as input and
    doesn't document which properties are pulled from it. PyLogit reverse engineered this
    for use in get_statsmodels_summary() -- so it's possible, but could be hard to
    maintain in the long run.

    We can't use PyLogit's summary table generator either. It requires a PyLogit
    model class as input, and we can't create one from results parameters. Oh well!

    """
    def fmt(value, format_str):
        # Custom numeric->string formatter that gracefully accepts null values
        return '' if value is None else format_str.format(value)

    if (title is None):
        title = "CHOICEMODELS ESTIMATION RESULTS"

    top_left = [['Dep. Var.:', dep_var], ['Model:', model_name],
                ['Method:', method], ['Date:', date], ['Time:', time],
                ['AIC:', fmt(aic, "{:,.3f}")], ['BIC:',
                                                fmt(bic, "{:,.3f}")]]

    top_right = [['No. Observations:',
                  fmt(num_obs, "{:,}")],
                 ['Df Residuals:', fmt(df_resid, "{:,}")],
                 ['Df Model:', fmt(df_model, "{:,}")],
                 ['Pseudo R-squ.:',
                  fmt(rho_squared, "{:.3f}")],
                 ['Pseudo R-bar-squ.:',
                  fmt(rho_bar_squared, "{:.3f}")],
                 ['Log-Likelihood:',
                  fmt(log_likelihood, "{:,.3f}")],
                 ['LL-Null:', fmt(null_log_likelihood, "{:,.3f}")]]

    # Zip into a single table (each side needs same number of entries)
    header_cells = [top_left[i] + top_right[i] for i in range(len(top_left))]

    # See end of statsmodels.iolib.table.py for formatting options
    header_fmt = dict(table_dec_below='',
                      data_aligns='lrlr',
                      colwidths=11,
                      colsep='   ',
                      empty_cell='')

    header = SimpleTable(header_cells, title=title, txt_fmt=header_fmt)

    col_labels = ['coef', 'std err', 'z', 'P>|z|', 'Conf. Int.']
    row_labels = x_names

    body_cells = [
        [
            fmt(coefs[i], "{:,.4f}"),
            fmt(std_errs[i], "{:,.3f}"),
            fmt(t_scores[i], "{:,.3f}"),
            '',  # p-value placeholder
            ''
        ]  # conf int placeholder
        for i in range(len(x_names))
    ]

    body_fmt = dict(table_dec_below='=',
                    header_align='r',
                    data_aligns='r',
                    colwidths=7,
                    colsep='   ')

    body = SimpleTable(body_cells,
                       headers=col_labels,
                       stubs=row_labels,
                       txt_fmt=body_fmt)

    # Ideally we'd want to append these into a single table, but I can't get it to work
    # without completely messing up the formatting..

    return (header, body)
Exemple #7
0
    def summary(self):
        """
        Constructs a summary of the results from a fit model.

        Returns
        -------
        summary : Summary instance
            Object that contains tables and facilitated export to text, html or
            latex
        """
        # Summary layout
        # 1. Overall information
        # 2. Mean parameters
        # 3. Volatility parameters
        # 4. Distribution parameters
        # 5. Notes

        model = self.model
        model_name = model.name + ' - ' + model.volatility.name

        # Summary Header
        top_left = [('Dep. Variable:', self._dep_name),
                    ('Mean Model:', model.name),
                    ('Vol Model:', model.volatility.name),
                    ('Distribution:', model.distribution.name),
                    ('Method:', 'User-specified Parameters'), ('', ''),
                    ('Date:', self._datetime.strftime('%a, %b %d %Y')),
                    ('Time:', self._datetime.strftime('%H:%M:%S'))]

        top_right = [
            ('R-squared:', '--'),
            ('Adj. R-squared:', '--'),
            ('Log-Likelihood:', '%#10.6g' % self.loglikelihood),
            ('AIC:', '%#10.6g' % self.aic),
            ('BIC:', '%#10.6g' % self.bic),
            ('No. Observations:', self._nobs),
            ('', ''),
            ('', ''),
        ]

        title = model_name + ' Model Results'
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        # create summary table instance
        smry = Summary()
        # Top Table
        # Parameter table
        fmt = fmt_2cols
        fmt['data_fmts'][1] = '%18s'

        top_right = [('%-21s' % ('  ' + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))
        smry.tables.append(table)

        stubs = self._names
        header = ['coef']
        vals = (self.params, )
        formats = [(10, 4)]
        pos = 0
        param_table_data = []
        for _ in range(len(vals[0])):
            row = []
            for i, val in enumerate(vals):
                if isinstance(val[pos], np.float64):
                    converted = format_float_fixed(val[pos], *formats[i])
                else:
                    converted = val[pos]
                row.append(converted)
            pos += 1
            param_table_data.append(row)

        mc = self.model.num_params
        vc = self.model.volatility.num_params
        dc = self.model.distribution.num_params
        counts = (mc, vc, dc)
        titles = ('Mean Model', 'Volatility Model', 'Distribution')
        total = 0
        for title, count in zip(titles, counts):
            if count == 0:
                continue

            table_data = param_table_data[total:total + count]
            table_stubs = stubs[total:total + count]
            total += count
            table = SimpleTable(table_data,
                                stubs=table_stubs,
                                txt_fmt=fmt_params,
                                headers=header,
                                title=title)
            smry.tables.append(table)

        extra_text = ('Results generated with user-specified parameters.',
                      'Since the model was not estimated, there are no std. '
                      'errors.')
        smry.add_extra_txt(extra_text)
        return smry
 def summary(self,evaluator):
     tbl = SimpleTable(self.summary_data(evaluator),['30','60','90','120'],['max','75%','median','25%','min'],title="Volatility Cone")
     return tbl
Exemple #9
0
    def summary_impacts(self,
                        impact_date=None,
                        impacted_variable=None,
                        groupby='impact date',
                        show_revisions=None,
                        sparsify=True,
                        float_format='%.2f'):
        """
        Create summary table with detailed impacts from news; by date, variable

        Parameters
        ----------
        impact_date : int, str, datetime, list, array, or slice, optional
            Observation index label or slice of labels specifying particular
            impact periods to display. The impact date(s) describe the periods
            in which impacted variables were *affected* by the news. If this
            argument is given, the output table will only show this impact date
            or dates. Note that this argument is passed to the Pandas `loc`
            accessor, and so it should correspond to the labels of the model's
            index. If the model was created with data in a list or numpy array,
            then these lables will be zero-indexes observation integers.
        impacted_variable : str, list, array, or slice, optional
            Observation variable label or slice of labels specifying particular
            impacted variables to display. The impacted variable(s) describe
            the variables that were *affected* by the news. If you do not know
            the labels for the variables, check the `endog_names` attribute of
            the model instance.
        groupby : {impact date, impacted date}
            The primary variable for grouping results in the impacts table. The
            default is to group by update date.
        show_revisions : bool, optional
            If set to False, the impacts table will not show the impacts from
            data revisions or the total impacts. Default is to show the
            revisions and totals columns if any revisions were made and
            otherwise to hide them.
        sparsify : bool, optional, default True
            Set to False for the table to include every one of the multiindex
            keys at each row.
        float_format : str, optional
            Formatter format string syntax for convering numbers to strings.
            Default is '%.2f'.

        Returns
        -------
        impacts_table : SimpleTable
            Table describing total impacts from both revisions and news. See
            the documentation for the `impacts` attribute for more details
            about the index and columns.

        See Also
        --------
        impacts
        """
        # Squeeze for univariate models
        if impacted_variable is None and self.updated.model.k_endog == 1:
            impacted_variable = self.updated.model.endog_names

        # Default is to only show the revisions columns if there were any
        # revisions (otherwise it would just be a column of zeros)
        if show_revisions is None:
            show_revisions = len(self.revisions_iloc) > 0

        # Select only the variables / dates of interest
        s = list(np.s_[:, :])
        if impact_date is not None:
            s[0] = np.s_[impact_date]
        if impacted_variable is not None:
            s[1] = np.s_[impacted_variable]
        s = tuple(s)
        impacts = self.impacts.loc[s, :]

        # Make the first index level the groupby level
        groupby = groupby.lower()
        if groupby in ['impacted variable', 'impacted_variable']:
            impacts.index = impacts.index.swaplevel(1, 0)
        elif groupby not in ['impact date', 'impact_date']:
            raise ValueError('Invalid groupby for impacts table. Valid options'
                             ' are "impact date" or "impacted variable".'
                             f'Got "{groupby}".')
        impacts = impacts.sort_index()

        # Drop the non-groupby level if there's only one value
        k_vars = len(impacts.index.remove_unused_levels().levels[1])
        removed_level = None
        if sparsify and k_vars == 1:
            name = impacts.index.names[1]
            value = impacts.index.levels[1][0]
            removed_level = f'{name} = {value}'
            impacts.index = impacts.index.droplevel(1)
            impacts = impacts.applymap(
                lambda num: '' if pd.isnull(num) else float_format % num)
            impacts = impacts.reset_index()
            impacts.iloc[:, 0] = impacts.iloc[:, 0].map(str)
        else:
            impacts = impacts.reset_index()
            impacts.iloc[:, :2] = impacts.iloc[:, :2].applymap(str)
            impacts.iloc[:, 2:] = impacts.iloc[:, 2:].applymap(
                lambda num: '' if pd.isnull(num) else float_format % num)

        # Sparsify the groupby column
        if sparsify and groupby in impacts:
            mask = impacts[groupby] == impacts[groupby].shift(1)
            impacts.loc[mask, groupby] = ''

        # Drop revisions and totals columns if applicable
        if not show_revisions:
            impacts.drop(['impact of revisions', 'total impact'],
                         axis=1,
                         inplace=True)

        params_data = impacts.values
        params_header = impacts.columns.tolist()
        params_stubs = None

        title = 'Impacts'
        if removed_level is not None:
            join = 'on' if groupby == 'date' else 'for'
            title += f' {join} [{removed_level}]'
        impacts_table = SimpleTable(params_data,
                                    params_header,
                                    params_stubs,
                                    txt_fmt=fmt_params,
                                    title=title)

        return impacts_table
Exemple #10
0
# Estimate an OLS model for comparison
res_ols = sm.OLS(y, X).fit()

# Compare the estimated parameters in WLS and OLS
print res_ols.params
print res_wls.params

# Compare the WLS standard errors to  heteroscedasticity corrected OLS standard
# errors:
se = np.vstack([[res_wls.bse], [res_ols.bse], [res_ols.HC0_se],
                [res_ols.HC1_se], [res_ols.HC2_se], [res_ols.HC3_se]])
se = np.round(se, 4)
colnames = ['x1', 'const']
rownames = ['WLS', 'OLS', 'OLS_HC0', 'OLS_HC1', 'OLS_HC3', 'OLS_HC3']
tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)
print tabl

# Calculate OLS prediction interval
covb = res_ols.cov_params()
prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1)
prediction_std = np.sqrt(prediction_var)
tppf = stats.t.ppf(0.975, res_ols.df_resid)

# Draw a plot to compare predicted values in WLS and OLS:
prstd, iv_l, iv_u = wls_prediction_std(res_wls)
plt.figure()
plt.plot(x, y, 'o', x, y_true, 'b-')
plt.plot(x, res_ols.fittedvalues, 'r--')
plt.plot(x, res_ols.fittedvalues + tppf * prediction_std, 'r--')
plt.plot(x, res_ols.fittedvalues - tppf * prediction_std, 'r--')
Exemple #11
0
def summary_params_2d(result,
                      extras=None,
                      endog_names=None,
                      exog_names=None,
                      title=None):
    '''create summary table of regression parameters with several equations

    This allows interleaving of parameters with bse and/or tvalues

    Parameters
    ----------
    result : result instance
        the result instance with params and attributes in extras
    extras : list[str]
        additional attributes to add below a parameter row, e.g. bse or tvalues
    endog_names : {list[str], None}
        names for rows of the parameter array (multivariate endog)
    exog_names : {list[str], None}
        names for columns of the parameter array (exog)
    alpha : float
        level for confidence intervals, default 0.95
    title : None or string

    Returns
    -------
    tables : list of SimpleTable
        this contains a list of all seperate Subtables
    table_all : SimpleTable
        the merged table with results concatenated for each row of the parameter
        array

    '''
    if endog_names is None:
        # TODO: note the [1:] is specific to current MNLogit
        endog_names = [
            'endog_%d' % i for i in np.unique(result.model.endog)[1:]
        ]
    if exog_names is None:
        exog_names = ['var%d' % i for i in range(len(result.params))]

    # TODO: check formatting options with different values
    res_params = [[forg(item, prec=4) for item in row]
                  for row in result.params]
    if extras:
        extras_list = [[[
            '%10s' % ('(' + forg(v, prec=3).strip() + ')') for v in col
        ] for col in getattr(result, what)] for what in extras]
        data = lzip(res_params, *extras_list)
        data = [i for j in data for i in j]  #flatten
        stubs = lzip(endog_names, *[[''] * len(endog_names)] * len(extras))
        stubs = [i for j in stubs for i in j]  #flatten
    else:
        data = res_params
        stubs = endog_names

    txt_fmt = copy.deepcopy(fmt_params)
    txt_fmt["data_fmts"] = ["%s"] * result.params.shape[1]

    return SimpleTable(data,
                       headers=exog_names,
                       stubs=stubs,
                       title=title,
                       txt_fmt=txt_fmt)
Exemple #12
0
    def test_SimpleTable_4(self):
        # Basic test, test_SimpleTable_4 test uses custom txt_fmt
        txt_fmt1 = dict(data_fmts=['%3.2f', '%d'],
                        empty_cell=' ',
                        colwidths=1,
                        colsep=' * ',
                        row_pre='* ',
                        row_post=' *',
                        table_dec_above='*',
                        table_dec_below='*',
                        header_dec_below='*',
                        header_fmt='%s',
                        stub_fmt='%s',
                        title_align='r',
                        header_align='r',
                        data_aligns="r",
                        stubs_align="l",
                        fmt='txt')
        ltx_fmt1 = default_latex_fmt.copy()
        html_fmt1 = default_html_fmt.copy()
        cell0data = 0.0000
        cell1data = 1
        row0data = [cell0data, cell1data]
        row1data = [2, 3.333]
        table1data = [row0data, row1data]
        test1stubs = ('stub1', 'stub2')
        test1header = ('header1', 'header2')
        tbl = SimpleTable(table1data,
                          test1header,
                          test1stubs,
                          txt_fmt=txt_fmt1,
                          ltx_fmt=ltx_fmt1,
                          html_fmt=html_fmt1)

        def test_txt_fmt1(self):
            # Limited test of custom txt_fmt
            desired = """
*****************************
*       * header1 * header2 *
*****************************
* stub1 *    0.00 *       1 *
* stub2 *    2.00 *       3 *
*****************************
"""
            actual = '\n%s\n' % tbl.as_text()
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)

            def test_ltx_fmt1(self):
                # Limited test of custom ltx_fmt
                desired = r"""
\begin{tabular}{lcc}
\toprule
                        & \textbf{header1} & \textbf{header2}  \\
\midrule
\textbf{stub1} &       0.0        &        1          \\
\textbf{stub2} &        2         &      3.333        \\
\bottomrule
\end{tabular}
"""

            actual = '\n%s\n' % tbl.as_latex_tabular()
            #print(actual)
            #print(desired)
            self.assertEqual(actual, desired)

        def test_html_fmt1(self):
            # Limited test of custom html_fmt
            desired = """
<table class="simpletable">
<tr>
    <td></td>    <th>header1</th> <th>header2</th>
</tr>
<tr>
  <th>stub1</th>   <td>0.0</td>      <td>1</td>
</tr>
<tr>
  <th>stub2</th>    <td>2</td>     <td>3.333</td>
</tr>
</table>
"""
            actual = '\n%s\n' % tbl.as_html()
            self.assertEqual(actual, desired)
Exemple #13
0
def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True,
                   skip_header=False):
    '''create a summary table for the parameters

    Parameters
    ----------
    res : results instance
        some required information is directly taken from the result
        instance
    yname : string or None
        optional name for the endogenous variable, default is "y"
    xname : list of strings or None
        optional names for the exogenous variables, default is "var_xx"
    alpha : float
        significance level for the confidence intervals
    use_t : bool
        indicator whether the p-values are based on the Student-t
        distribution (if True) or on the normal distribution (if False)
    skip_headers : bool
        If false (default), then the header row is added. If true, then no
        header row is added.

    Returns
    -------
    params_table : SimpleTable instance
    '''

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal

    if isinstance(results, tuple):
        #for multivariate endog
        #TODO: check whether I don't want to refactor this
        #we need to give parameter alpha to conf_int
        results, params, std_err, tvalues, pvalues, conf_int = results
    else:
        params = results.params
        std_err = results.bse
        tvalues = results.tvalues  #is this sometimes called zvalues
        pvalues = results.pvalues
        conf_int = results.conf_int(alpha)


    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    if use_t:
        param_header = ['coef', 'std err', 't', 'P>|t|',
                        '[' + alp + ' Conf. Int.]']
    else:
        param_header = ['coef', 'std err', 'z', 'P>|z|',
                        '[' + alp + ' Conf. Int.]']

    if skip_header:
        param_header = None


    _, xname = _getnames(results, yname=yname, xname=xname)

    params_stubs = xname

    exog_idx = xrange(len(xname))

    #center confidence intervals if they are unequal lengths
#    confint = ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
#                                                             exog_idx]
    confint = ["%s %s" % tuple(map(forg, conf_int[i])) for i in \
                                                             exog_idx]
    len_ci = map(len, confint)
    max_ci = max(len_ci)
    min_ci = min(len_ci)

    if min_ci < max_ci:
        confint = [ci.center(max_ci) for ci in confint]

    #explicit f/g formatting, now uses forg, f or g depending on values
#    params_data = zip(["%#6.4g" % (params[i]) for i in exog_idx],
#                       ["%#6.4f" % (std_err[i]) for i in exog_idx],
#                       ["%#6.3f" % (tvalues[i]) for i in exog_idx],
#                       ["%#6.3f" % (pvalues[i]) for i in exog_idx],
#                       confint
##                       ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
##                                                             exog_idx]
#                      )

    params_data = zip([forg(params[i], prec=4) for i in exog_idx],
                       [forg(std_err[i]) for i in exog_idx],
                       [forg(tvalues[i]) for i in exog_idx],
                       ["%#6.3f" % (pvalues[i]) for i in exog_idx],
                       confint
#                       ["(%#6.3g, %#6.3g)" % tuple(conf_int[i]) for i in \
#                                                             exog_idx]
                      )
    parameter_table = SimpleTable(params_data,
                                  param_header,
                                  params_stubs,
                                  title = None,
                                  txt_fmt = fmt_params #gen_fmt #fmt_2, #gen_fmt,
                                  )

    return parameter_table
Exemple #14
0
def summary_table(res, alpha=0.05):
    """
    Generate summary table of outlier and influence similar to SAS

    Parameters
    ----------
    alpha : float
       significance level for confidence interval

    Returns
    -------
    st : SimpleTable instance
       table with results that can be printed
    data : ndarray
       calculated measures and statistics for the table
    ss2 : list of strings
       column_names for table (Note: rows of table are observations)
    """

    from scipy import stats
    from statsmodels.sandbox.regression.predstd import wls_prediction_std

    infl = OLSInfluence(res)

    #standard error for predicted mean
    #Note: using hat_matrix only works for fitted values
    predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid)

    tppf = stats.t.isf(alpha/2., res.df_resid)
    predict_mean_ci = np.column_stack([
                        res.fittedvalues - tppf * predict_mean_se,
                        res.fittedvalues + tppf * predict_mean_se])


    #standard error for predicted observation
    tmp = wls_prediction_std(res, alpha=alpha)
    predict_se, predict_ci_low, predict_ci_upp = tmp

    predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))

    #standard deviation of residual
    resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))

    table_sm = np.column_stack([
                                  np.arange(res.nobs) + 1,
                                  res.model.endog,
                                  res.fittedvalues,
                                  predict_mean_se,
                                  predict_mean_ci[:,0],
                                  predict_mean_ci[:,1],
                                  predict_ci[:,0],
                                  predict_ci[:,1],
                                  res.resid,
                                  resid_se,
                                  infl.resid_studentized_internal,
                                  infl.cooks_distance[0]
                                  ])


    #colnames, data = lzip(*table_raw) #unzip
    data = table_sm
    ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"]
    colnames = ss2
    #self.table_data = data
    #data = np.column_stack(data)
    from statsmodels.iolib.table import SimpleTable, default_html_fmt
    from statsmodels.iolib.tableformatting import fmt_base
    from copy import deepcopy
    fmt = deepcopy(fmt_base)
    fmt_html = deepcopy(default_html_fmt)
    fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
    #fmt_html['data_fmts'] = fmt['data_fmts']
    st = SimpleTable(data, headers=colnames, txt_fmt=fmt,
                       html_fmt=fmt_html)

    return st, data, ss2
Exemple #15
0
def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True,
                   skip_header=False, title=None):
    '''create a summary table for the parameters

    Parameters
    ----------
    res : results instance
        some required information is directly taken from the result
        instance
    yname : string or None
        optional name for the endogenous variable, default is "y"
    xname : list of strings or None
        optional names for the exogenous variables, default is "var_xx"
    alpha : float
        significance level for the confidence intervals
    use_t : bool
        indicator whether the p-values are based on the Student-t
        distribution (if True) or on the normal distribution (if False)
    skip_headers : bool
        If false (default), then the header row is added. If true, then no
        header row is added.

    Returns
    -------
    params_table : SimpleTable instance
    '''

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal

    if isinstance(results, tuple):
        #for multivariate endog
        #TODO: check whether I don't want to refactor this
        #we need to give parameter alpha to conf_int
        results, params, std_err, tvalues, pvalues, conf_int = results
    else:
        params = results.params
        std_err = results.bse
        tvalues = results.tvalues  #is this sometimes called zvalues
        pvalues = results.pvalues
        conf_int = results.conf_int(alpha)


    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    if use_t:
        param_header = ['coef', 'std err', 't', 'P>|t|',
                        '[' + str(alpha/2), str(1-alpha/2) + ']']
    else:
        param_header = ['coef', 'std err', 'z', 'P>|z|',
                        '[' + str(alpha/2), str(1-alpha/2) + ']']

    if skip_header:
        param_header = None


    _, xname = _getnames(results, yname=yname, xname=xname)

    if len(xname) != len(params):
        raise ValueError('xnames and params do not have the same length')

    params_stubs = xname

    exog_idx = lrange(len(xname))

    params_data = lzip([forg(params[i], prec=4) for i in exog_idx],
                       [forg(std_err[i]) for i in exog_idx],
                       [forg(tvalues[i]) for i in exog_idx],
                       ["%#6.3f" % (pvalues[i]) for i in exog_idx],
                       [forg(conf_int[i,0]) for i in exog_idx],
                       [forg(conf_int[i,1]) for i in exog_idx])
    parameter_table = SimpleTable(params_data,
                                  param_header,
                                  params_stubs,
                                  title = title,
                                  txt_fmt = fmt_params #gen_fmt #fmt_2, #gen_fmt,
                                  )

    return parameter_table
Exemple #16
0
    def summary(self):
        """
        Constructs a summary of the results from a fit model.

        Returns
        -------
        summary : Summary instance
            Object that contains tables and facilitated export to text, html or
            latex
        """
        # Summary layout
        # 1. Overall information
        # 2. Mean parameters
        # 3. Volatility parameters
        # 4. Distribution parameters
        # 5. Notes

        model = self.model
        model_name = model.name + ' - ' + model.volatility.name

        # Summary Header
        top_left = [('Dep. Variable:', self._dep_name),
                    ('Mean Model:', model.name),
                    ('Vol Model:', model.volatility.name),
                    ('Distribution:', model.distribution.name),
                    ('Method:', 'Maximum Likelihood'), ('', ''),
                    ('Date:', self._datetime.strftime('%a, %b %d %Y')),
                    ('Time:', self._datetime.strftime('%H:%M:%S'))]

        top_right = [('R-squared:', '%#8.3f' % self.rsquared),
                     ('Adj. R-squared:', '%#8.3f' % self.rsquared_adj),
                     ('Log-Likelihood:', '%#10.6g' % self.loglikelihood),
                     ('AIC:', '%#10.6g' % self.aic),
                     ('BIC:', '%#10.6g' % self.bic),
                     ('No. Observations:', self._nobs),
                     ('Df Residuals:', self.nobs - self.num_params),
                     ('Df Model:', self.num_params)]

        title = model_name + ' Model Results'
        stubs = []
        vals = []
        for stub, val in top_left:
            stubs.append(stub)
            vals.append([val])
        table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs)

        # create summary table instance
        smry = Summary()
        # Top Table
        # Parameter table
        fmt = fmt_2cols
        fmt['data_fmts'][1] = '%18s'

        top_right = [('%-21s' % ('  ' + k), v) for k, v in top_right]
        stubs = []
        vals = []
        for stub, val in top_right:
            stubs.append(stub)
            vals.append([val])
        table.extend_right(SimpleTable(vals, stubs=stubs))
        smry.tables.append(table)

        conf_int = np.asarray(self.conf_int())
        conf_int_str = []
        for c in conf_int:
            conf_int_str.append('[' + format_float_fixed(c[0], 7, 3) + ',' +
                                format_float_fixed(c[1], 7, 3) + ']')

        stubs = self._names
        header = ['coef', 'std err', 't', 'P>|t|', '95.0% Conf. Int.']
        vals = (self.params, self.std_err, self.tvalues, self.pvalues,
                conf_int_str)
        formats = [(10, 4), (9, 3), (9, 3), (9, 3), None]
        pos = 0
        param_table_data = []
        for _ in range(len(vals[0])):
            row = []
            for i, val in enumerate(vals):
                if isinstance(val[pos], np.float64):
                    converted = format_float_fixed(val[pos], *formats[i])
                else:
                    converted = val[pos]
                row.append(converted)
            pos += 1
            param_table_data.append(row)

        mc = self.model.num_params
        vc = self.model.volatility.num_params
        dc = self.model.distribution.num_params
        counts = (mc, vc, dc)
        titles = ('Mean Model', 'Volatility Model', 'Distribution')
        total = 0
        for title, count in zip(titles, counts):
            if count == 0:
                continue

            table_data = param_table_data[total:total + count]
            table_stubs = stubs[total:total + count]
            total += count
            table = SimpleTable(table_data,
                                stubs=table_stubs,
                                txt_fmt=fmt_params,
                                headers=header,
                                title=title)
            smry.tables.append(table)

        extra_text = ['Covariance estimator: ' + self.cov_type]

        if self.convergence_flag:
            extra_text.append("""
WARNING: The optimizer did not indicate sucessful convergence. The message was
{string_message}. See convergence_flag.""".format(
                string_message=self._optim_output[-1]))

        smry.add_extra_txt(extra_text)
        return smry
Exemple #17
0
def summary(self, yname=None, xname=None, title=0, alpha=.05,
            returns='text', model_info=None):
    """
    Parameters
    -----------
    yname : string
            optional, Default is `Y`
    xname : list of strings
            optional, Default is `X.#` for # in p the number of regressors
    Confidance interval : (0,1) not implimented
    title : string
            optional, Defualt is 'Generalized linear model'
    returns : string
              'text', 'table', 'csv', 'latex', 'html'

    Returns
    -------
    Default :
    returns='print'
            Prints the summarirized results

    Option :
    returns='text'
            Prints the summarirized results

    Option :
    returns='table'
             SimpleTable instance : summarizing the fit of a linear model.

    Option :
    returns='csv'
            returns a string of csv of the results, to import into a spreadsheet

    Option :
    returns='latex'
    Not implimented yet

    Option :
    returns='HTML'
    Not implimented yet


    Examples (needs updating)
    --------
    >>> import statsmodels as sm
    >>> data = sm.datasets.longley.load(as_pandas=False)
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_results = sm.OLS(data.endog, data.exog).results
    >>> print ols_results.summary()
    ...

    Notes
    -----
    conf_int calculated from normal dist.
    """
    import time as time



    #TODO Make sure all self.model.__class__.__name__ are listed
    model_types = {'OLS' : 'Ordinary least squares',
                   'GLS' : 'Generalized least squares',
                   'GLSAR' : 'Generalized least squares with AR(p)',
                   'WLS' : 'Weighted least squares',
                   'RLM' : 'Robust linear model',
                   'GLM' : 'Generalized linear model'
                   }
    model_methods = {'OLS' : 'Least Squares',
                   'GLS' : 'Least Squares',
                   'GLSAR' : 'Least Squares',
                   'WLS' : 'Least Squares',
                   'RLM' : '?',
                   'GLM' : '?'}
    if title==0:
        title = model_types[self.model.__class__.__name__]
    if yname is None:
        try:
            yname = self.model.endog_names
        except AttributeError:
            yname = 'y'
    if xname is None:
        try:
            xname = self.model.exog_names
        except AttributeError:
            xname = ['var_%d' % i for i in range(len(self.params))]
    time_now = time.localtime()
    time_of_day = [time.strftime("%H:%M:%S", time_now)]
    date = time.strftime("%a, %d %b %Y", time_now)
    modeltype = self.model.__class__.__name__
    #dist_family = self.model.family.__class__.__name__
    nobs = self.nobs
    df_model = self.df_model
    df_resid = self.df_resid

    #General part of the summary table, Applicable to all? models
    #------------------------------------------------------------
    #TODO: define this generically, overwrite in model classes
    #replace definition of stubs data by single list
    #e.g.
    gen_left =   [('Model type:', [modeltype]),
                  ('Date:', [date]),
                  ('Dependent Variable:', yname), #What happens with multiple names?
                  ('df model', [df_model])
                  ]
    gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col

    gen_title = title
    gen_header = None
##    gen_stubs_left = ('Model type:',
##                      'Date:',
##                      'Dependent Variable:',
##                      'df model'
##                  )
##    gen_data_left = [[modeltype],
##                     [date],
##                     yname, #What happens with multiple names?
##                     [df_model]
##                     ]
    gen_table_left = SimpleTable(gen_data_left,
                                 gen_header,
                                 gen_stubs_left,
                                 title = gen_title,
                                 txt_fmt = gen_fmt
                                 )

    gen_stubs_right = ('Method:',
                       'Time:',
                       'Number of Obs:',
                       'df resid')
    gen_data_right = ([modeltype], #was dist family need to look at more
                      time_of_day,
                      [nobs],
                      [df_resid]
                      )
    gen_table_right = SimpleTable(gen_data_right,
                                  gen_header,
                                  gen_stubs_right,
                                  title = gen_title,
                                  txt_fmt = gen_fmt)
    gen_table_left.extend_right(gen_table_right)
    general_table = gen_table_left

    #Parameters part of the summary table
    #------------------------------------
    #Note: this is not necessary since we standardized names, only t versus normal
    tstats = {'OLS' : self.t(),
            'GLS' : self.t(),
            'GLSAR' : self.t(),
            'WLS' : self.t(),
            'RLM' : self.t(),
            'GLM' : self.t()}
    prob_stats = {'OLS' : self.pvalues,
                  'GLS' : self.pvalues,
                  'GLSAR' : self.pvalues,
                  'WLS' : self.pvalues,
                  'RLM' : self.pvalues,
                  'GLM' : self.pvalues
                  }
    #Dictionary to store the header names for the parameter part of the
    #summary table. look up by modeltype
    alp = str((1-alpha)*100)+'%'
    param_header = {
         'OLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'WLS'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'],
         'GLM'   : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution
         'RLM'   : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval']  #checke z
                   }
    params_stubs = xname
    params = self.params
    conf_int = self.conf_int(alpha)
    std_err = self.bse
    exog_len = lrange(len(xname))
    tstat = tstats[modeltype]
    prob_stat = prob_stats[modeltype]

    # Simpletable should be able to handle the formating
    params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len],
                       ["%#6.4f" % (std_err[i]) for i in exog_len],
                       ["%#6.4f" % (tstat[i]) for i in exog_len],
                       ["%#6.4f" % (prob_stat[i]) for i in exog_len],
                       ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len])
    parameter_table = SimpleTable(params_data,
                                  param_header[modeltype],
                                  params_stubs,
                                  title = None,
                                  txt_fmt = fmt_2, #gen_fmt,
                                  )

    #special table
    #-------------
    #TODO: exists in linear_model, what about other models
    #residual diagnostics


    #output options
    #--------------
    #TODO: JP the rest needs to be fixed, similar to summary in linear_model

    def ols_printer():
        """
        print summary table for ols models
        """
        table = str(general_table)+'\n'+str(parameter_table)
        return table

    def ols_to_csv():
        """
        exports ols summary data to csv
        """
        pass
    def glm_printer():
        table = str(general_table)+'\n'+str(parameter_table)
        return table
        pass

    printers  = {'OLS': ols_printer,
                 'GLM': glm_printer}

    if returns=='print':
        try:
            return printers[modeltype]()
        except KeyError:
            return printers['OLS']()
Exemple #18
0
    def summary(self):
        """
        Summarize the fitted Model

        Returns
        -------
        smry : Summary instance
            This holds the summary table and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary
        """
        from statsmodels.iolib.summary import Summary
        from statsmodels.iolib.table import SimpleTable

        model = self.model
        title = model.__class__.__name__ + " Model Results"

        dep_variable = "endog"
        orig_endog = self.model.data.orig_endog
        if isinstance(orig_endog, pd.DataFrame):
            dep_variable = orig_endog.columns[0]
        elif isinstance(orig_endog, pd.Series):
            dep_variable = orig_endog.name
        seasonal_periods = (None if self.model.seasonal is None else
                            self.model.seasonal_periods)
        lookup = {
            "add": "Additive",
            "additive": "Additive",
            "mul": "Multiplicative",
            "multiplicative": "Multiplicative",
            None: "None",
        }
        transform = self.params["use_boxcox"]
        box_cox_transform = True if transform else False
        box_cox_coeff = (transform if isinstance(transform, str) else
                         self.params["lamda"])
        if isinstance(box_cox_coeff, float):
            box_cox_coeff = "{:>10.5f}".format(box_cox_coeff)
        top_left = [
            ("Dep. Variable:", [dep_variable]),
            ("Model:", [model.__class__.__name__]),
            ("Optimized:", [str(np.any(self.optimized))]),
            ("Trend:", [lookup[self.model.trend]]),
            ("Seasonal:", [lookup[self.model.seasonal]]),
            ("Seasonal Periods:", [str(seasonal_periods)]),
            ("Box-Cox:", [str(box_cox_transform)]),
            ("Box-Cox Coeff.:", [str(box_cox_coeff)]),
        ]

        top_right = [
            ("No. Observations:", [str(len(self.model.endog))]),
            ("SSE", ["{:5.3f}".format(self.sse)]),
            ("AIC", ["{:5.3f}".format(self.aic)]),
            ("BIC", ["{:5.3f}".format(self.bic)]),
            ("AICC", ["{:5.3f}".format(self.aicc)]),
            ("Date:", None),
            ("Time:", None),
        ]

        smry = Summary()
        smry.add_table_2cols(self,
                             gleft=top_left,
                             gright=top_right,
                             title=title)
        formatted = self.params_formatted  # type: pd.DataFrame

        def _fmt(x):
            abs_x = np.abs(x)
            scale = 1
            if np.isnan(x):
                return f"{str(x):>20}"
            if abs_x != 0:
                scale = int(np.log10(abs_x))
            if scale > 4 or scale < -3:
                return "{:>20.5g}".format(x)
            dec = min(7 - scale, 7)
            fmt = "{{:>20.{0}f}}".format(dec)
            return fmt.format(x)

        tab = []
        for _, vals in formatted.iterrows():
            tab.append([
                _fmt(vals.iloc[1]),
                "{0:>20}".format(vals.iloc[0]),
                "{0:>20}".format(str(bool(vals.iloc[2]))),
            ])
        params_table = SimpleTable(
            tab,
            headers=["coeff", "code", "optimized"],
            title="",
            stubs=list(formatted.index),
        )

        smry.tables.append(params_table)

        return smry