def plot_fit(res, exog_idx, y_true=None, ax=None):
    '''plot fit against one regressor

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : None or matplotlib axis instance
        If ax is given then the plot is attached to it, otherwise a new figure
        is created and returned.

    Returns
    -------
    fig_or_ax : matplotlib figure or axis instance
        If ax was given as parameter then the plot is attached to it, otherwise
        a new figure is created. Either the figure or the given axis is returned.

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    '''
    import matplotlib.pyplot as plt

    #maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        fig_or_ax = fig
    else:
        fig_or_ax = ax

    ax.plot(x1, y, 'bo')
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], 'b-')
        title = 'fitted versus regressor %d, blue: true,   black: OLS' % exog_idx
    else:
        title = 'fitted versus regressor %d, blue: observed, black: OLS' % exog_idx

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues[x1_argsort], 'k-') #'k-o')
    #plt.plot(x1, iv_u, 'r--')
    #plt.plot(x1, iv_l, 'r--')
    ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k')
    ax.set_title(title)

    return fig_or_ax
def summary_obs(res, alpha=0.05):

    from scipy import stats
    from scikits.statsmodels.sandbox.regression.predstd import wls_prediction_std

    infl = Influence(res)

    #standard error for predicted mean
    #Note: using hat_matrix only works for fitted values
    predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid)

    tppf = stats.t.isf(alpha/2., res.df_resid)
    predict_mean_ci = np.column_stack([
                        res.fittedvalues - tppf * predict_mean_se,
                        res.fittedvalues + tppf * predict_mean_se])


    #standard error for predicted observation
    predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res)
    predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))

    #standard deviation of residual
    resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))

    table_sm = np.column_stack([
                                  np.arange(res.nobs) + 1,
                                  res.model.endog,
                                  res.fittedvalues,
                                  predict_mean_se,
                                  predict_mean_ci[:,0],
                                  predict_mean_ci[:,1],
                                  predict_ci[:,0],
                                  predict_ci[:,1],
                                  res.resid,
                                  resid_se,
                                  infl.resid_studentized_internal,
                                  infl.cooks_distance()[0]
                                  ])


    #colnames, data = zip(*table_raw) #unzip
    data = table_sm
    ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"]
    colnames = ss2
    #self.table_data = data
    #data = np.column_stack(data)
    data = np.round(data,4)
    #self.table = data
    from scikits.statsmodels.iolib.table import SimpleTable, default_html_fmt
    from scikits.statsmodels.iolib.tableformatting import fmt_base
    from copy import deepcopy
    fmt = deepcopy(fmt_base)
    fmt_html = deepcopy(default_html_fmt)
    fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
    #fmt_html['data_fmts'] = fmt['data_fmts']
    st = SimpleTable(data, headers=colnames, txt_fmt=fmt,
                       html_fmt=fmt_html)

    return st, data, ss2
Example #3
0
def plot_fit(res, exog_idx, y_true=None, ax=None):
    """Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    """
    fig, ax = utils.create_mpl_ax(ax)

    #maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    ax.plot(x1, y, 'bo')
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], 'b-')
        title = 'fitted versus regressor %d, blue: true,   black: OLS' % exog_idx
    else:
        title = 'fitted versus regressor %d, blue: observed, black: OLS' % exog_idx

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues[x1_argsort], 'k-')  #'k-o')
    #ax.plot(x1, iv_u, 'r--')
    #ax.plot(x1, iv_l, 'r--')
    ax.fill_between(x1,
                    iv_l[x1_argsort],
                    iv_u[x1_argsort],
                    alpha=0.1,
                    color='k')
    ax.set_title(title)

    return fig
Example #4
0
def plot_fit(res, exog_idx, exog_name='', y_true=None, ax=None, fontsize='small'):
    """Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    """
    fig, ax = utils.create_mpl_ax(ax)

    if exog_name == '':
        exog_name = 'variable %d' % exog_idx

    #maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    ax.plot(x1, y, 'bo', label='observed')
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], 'b-', label='true')
        title = 'fitted versus regressor %s' % exog_name
    else:
        title = 'fitted versus regressor %s' % exog_name

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues[x1_argsort], 'k-', label='fitted') #'k-o')
    #ax.plot(x1, iv_u, 'r--')
    #ax.plot(x1, iv_l, 'r--')
    ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k')
    ax.set_title(title, fontsize=fontsize)

    return fig
def plot_fit(res, exog_idx, y_true=None, ax=None):
    '''plot fit against one regressor

    check for which other models than OLS this also works, use model.name

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot

    Returns
    -------
    fig : matplotlib figure instance

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    '''
    import matplotlib.pyplot as plt

    #maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]

    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        fig_or_ax = fig
    else:
        fig_or_ax = ax

    ax.plot(x1, y, 'bo')
    if not y_true is None:
        ax.plot(x1, y_true, 'b-')
        title = 'fitted versus regressor %d, blue: true,   black: OLS' % exog_idx
    else:
        title = 'fitted versus regressor %d, blue: observed, black: OLS' % exog_idx

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues, 'k-') #'k-o')
    #plt.plot(x1, iv_u, 'r--')
    #plt.plot(x1, iv_l, 'r--')
    ax.fill_between(x1, iv_l, iv_u, alpha=0.1, color='k')
    ax.set_title(title)

    return fig_or_ax
Example #6
0
sig = 0.5
x1 = np.linspace(0, 20, nsample)
X = np.c_[x1, np.sin(x1), (x1 - 5)**2, np.ones(nsample)]
beta = [0.5, 0.5, -0.02, 5.]
y_true = np.dot(X, beta)
y = y_true + sig * np.random.normal(size=nsample)

plt.figure()
plt.plot(x1, y, 'o', x1, y_true, 'b-')

res = sm.OLS(y, X).fit()
print res.params
print res.bse
#current bug predict requires call to model.results
#print res.model.predict
prstd, iv_l, iv_u = wls_prediction_std(res)
plt.plot(x1, res.fittedvalues, 'r--.')
plt.plot(x1, iv_u, 'r--')
plt.plot(x1, iv_l, 'r--')
plt.title('blue: true,   red: OLS')

print res.summary()

#OLS with dummy variables
#------------------------

sig = 1.
#suppose observations from 3 groups
xg = np.zeros(nsample, int)
xg[20:40] = 1
xg[40:] = 2
sig = 0.5
x1 = np.linspace(0, 20, nsample)
X = np.c_[x1, np.sin(x1), (x1-5)**2, np.ones(nsample)]
beta = [0.5, 0.5, -0.02, 5.]
y_true = np.dot(X, beta)
y = y_true + sig * np.random.normal(size=nsample)

plt.figure()
plt.plot(x1, y, 'o', x1, y_true, 'b-')

res = sm.OLS(y, X).fit()
print res.params
print res.bse
#current bug predict requires call to model.results
#print res.model.predict
prstd, iv_l, iv_u = wls_prediction_std(res)
plt.plot(x1, res.fittedvalues, 'r--.')
plt.plot(x1, iv_u, 'r--')
plt.plot(x1, iv_l, 'r--')
plt.title('blue: true,   red: OLS')

print res.summary()


#OLS with dummy variables
#------------------------

sig = 1.
#suppose observations from 3 groups
xg = np.zeros(nsample, int)
xg[20:40] = 1