def plot_fit(res, exog_idx, y_true=None, ax=None): '''plot fit against one regressor This creates one graph with the scatterplot of observed values compared to fitted values. Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix y_true : array_like (optional) If this is not None, then the array is added to the plot ax : None or matplotlib axis instance If ax is given then the plot is attached to it, otherwise a new figure is created and returned. Returns ------- fig_or_ax : matplotlib figure or axis instance If ax was given as parameter then the plot is attached to it, otherwise a new figure is created. Either the figure or the given axis is returned. Notes ----- This is currently very simple, no options or varnames yet. ''' import matplotlib.pyplot as plt #maybe add option for wendog, wexog y = res.model.endog x1 = res.model.exog[:, exog_idx] x1_argsort = np.argsort(x1) y = y[x1_argsort] x1 = x1[x1_argsort] if ax is None: fig = plt.figure() ax = fig.add_subplot(111) fig_or_ax = fig else: fig_or_ax = ax ax.plot(x1, y, 'bo') if not y_true is None: ax.plot(x1, y_true[x1_argsort], 'b-') title = 'fitted versus regressor %d, blue: true, black: OLS' % exog_idx else: title = 'fitted versus regressor %d, blue: observed, black: OLS' % exog_idx prstd, iv_l, iv_u = wls_prediction_std(res) ax.plot(x1, res.fittedvalues[x1_argsort], 'k-') #'k-o') #plt.plot(x1, iv_u, 'r--') #plt.plot(x1, iv_l, 'r--') ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k') ax.set_title(title) return fig_or_ax
def summary_obs(res, alpha=0.05): from scipy import stats from scikits.statsmodels.sandbox.regression.predstd import wls_prediction_std infl = Influence(res) #standard error for predicted mean #Note: using hat_matrix only works for fitted values predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid) tppf = stats.t.isf(alpha/2., res.df_resid) predict_mean_ci = np.column_stack([ res.fittedvalues - tppf * predict_mean_se, res.fittedvalues + tppf * predict_mean_se]) #standard error for predicted observation predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res) predict_ci = np.column_stack((predict_ci_low, predict_ci_upp)) #standard deviation of residual resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag)) table_sm = np.column_stack([ np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues, predict_mean_se, predict_mean_ci[:,0], predict_mean_ci[:,1], predict_ci[:,0], predict_ci[:,1], res.resid, resid_se, infl.resid_studentized_internal, infl.cooks_distance()[0] ]) #colnames, data = zip(*table_raw) #unzip data = table_sm ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"] colnames = ss2 #self.table_data = data #data = np.column_stack(data) data = np.round(data,4) #self.table = data from scikits.statsmodels.iolib.table import SimpleTable, default_html_fmt from scikits.statsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html) return st, data, ss2
def plot_fit(res, exog_idx, y_true=None, ax=None): """Plot fit against one regressor. This creates one graph with the scatterplot of observed values compared to fitted values. Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix y_true : array_like (optional) If this is not None, then the array is added to the plot ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- This is currently very simple, no options or varnames yet. """ fig, ax = utils.create_mpl_ax(ax) #maybe add option for wendog, wexog y = res.model.endog x1 = res.model.exog[:, exog_idx] x1_argsort = np.argsort(x1) y = y[x1_argsort] x1 = x1[x1_argsort] ax.plot(x1, y, 'bo') if not y_true is None: ax.plot(x1, y_true[x1_argsort], 'b-') title = 'fitted versus regressor %d, blue: true, black: OLS' % exog_idx else: title = 'fitted versus regressor %d, blue: observed, black: OLS' % exog_idx prstd, iv_l, iv_u = wls_prediction_std(res) ax.plot(x1, res.fittedvalues[x1_argsort], 'k-') #'k-o') #ax.plot(x1, iv_u, 'r--') #ax.plot(x1, iv_l, 'r--') ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k') ax.set_title(title) return fig
def plot_fit(res, exog_idx, exog_name='', y_true=None, ax=None, fontsize='small'): """Plot fit against one regressor. This creates one graph with the scatterplot of observed values compared to fitted values. Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix y_true : array_like (optional) If this is not None, then the array is added to the plot ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- This is currently very simple, no options or varnames yet. """ fig, ax = utils.create_mpl_ax(ax) if exog_name == '': exog_name = 'variable %d' % exog_idx #maybe add option for wendog, wexog y = res.model.endog x1 = res.model.exog[:, exog_idx] x1_argsort = np.argsort(x1) y = y[x1_argsort] x1 = x1[x1_argsort] ax.plot(x1, y, 'bo', label='observed') if not y_true is None: ax.plot(x1, y_true[x1_argsort], 'b-', label='true') title = 'fitted versus regressor %s' % exog_name else: title = 'fitted versus regressor %s' % exog_name prstd, iv_l, iv_u = wls_prediction_std(res) ax.plot(x1, res.fittedvalues[x1_argsort], 'k-', label='fitted') #'k-o') #ax.plot(x1, iv_u, 'r--') #ax.plot(x1, iv_l, 'r--') ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k') ax.set_title(title, fontsize=fontsize) return fig
def plot_fit(res, exog_idx, y_true=None, ax=None): '''plot fit against one regressor check for which other models than OLS this also works, use model.name This plots four graphs in a 2 by 2 figure: 'endog versus exog', 'residuals versus exog', 'fitted versus exog' and 'fitted plus residual versus exog' Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix y_true : array_like (optional) If this is not None, then the array is added to the plot Returns ------- fig : matplotlib figure instance Notes ----- This is currently very simple, no options or varnames yet. ''' import matplotlib.pyplot as plt #maybe add option for wendog, wexog y = res.model.endog x1 = res.model.exog[:, exog_idx] if ax is None: fig = plt.figure() ax = fig.add_subplot(111) fig_or_ax = fig else: fig_or_ax = ax ax.plot(x1, y, 'bo') if not y_true is None: ax.plot(x1, y_true, 'b-') title = 'fitted versus regressor %d, blue: true, black: OLS' % exog_idx else: title = 'fitted versus regressor %d, blue: observed, black: OLS' % exog_idx prstd, iv_l, iv_u = wls_prediction_std(res) ax.plot(x1, res.fittedvalues, 'k-') #'k-o') #plt.plot(x1, iv_u, 'r--') #plt.plot(x1, iv_l, 'r--') ax.fill_between(x1, iv_l, iv_u, alpha=0.1, color='k') ax.set_title(title) return fig_or_ax
sig = 0.5 x1 = np.linspace(0, 20, nsample) X = np.c_[x1, np.sin(x1), (x1 - 5)**2, np.ones(nsample)] beta = [0.5, 0.5, -0.02, 5.] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) plt.figure() plt.plot(x1, y, 'o', x1, y_true, 'b-') res = sm.OLS(y, X).fit() print res.params print res.bse #current bug predict requires call to model.results #print res.model.predict prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot(x1, res.fittedvalues, 'r--.') plt.plot(x1, iv_u, 'r--') plt.plot(x1, iv_l, 'r--') plt.title('blue: true, red: OLS') print res.summary() #OLS with dummy variables #------------------------ sig = 1. #suppose observations from 3 groups xg = np.zeros(nsample, int) xg[20:40] = 1 xg[40:] = 2
sig = 0.5 x1 = np.linspace(0, 20, nsample) X = np.c_[x1, np.sin(x1), (x1-5)**2, np.ones(nsample)] beta = [0.5, 0.5, -0.02, 5.] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) plt.figure() plt.plot(x1, y, 'o', x1, y_true, 'b-') res = sm.OLS(y, X).fit() print res.params print res.bse #current bug predict requires call to model.results #print res.model.predict prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot(x1, res.fittedvalues, 'r--.') plt.plot(x1, iv_u, 'r--') plt.plot(x1, iv_l, 'r--') plt.title('blue: true, red: OLS') print res.summary() #OLS with dummy variables #------------------------ sig = 1. #suppose observations from 3 groups xg = np.zeros(nsample, int) xg[20:40] = 1