def plot_fit(res, exog_idx, exog_name="", y_true=None, ax=None, fontsize="small"): """Plot fit against one regressor. This creates one graph with the scatterplot of observed values compared to fitted values. Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix y_true : array_like (optional) If this is not None, then the array is added to the plot ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- This is currently very simple, no options or varnames yet. """ fig, ax = utils.create_mpl_ax(ax) if exog_name == "": exog_name = "variable %d" % exog_idx # maybe add option for wendog, wexog y = res.model.endog x1 = res.model.exog[:, exog_idx] x1_argsort = np.argsort(x1) y = y[x1_argsort] x1 = x1[x1_argsort] ax.plot(x1, y, "bo", label="observed") if not y_true is None: ax.plot(x1, y_true[x1_argsort], "b-", label="true") title = "fitted versus regressor %s" % exog_name else: title = "fitted versus regressor %s" % exog_name prstd, iv_l, iv_u = wls_prediction_std(res) ax.plot(x1, res.fittedvalues[x1_argsort], "k-", label="fitted") #'k-o') # ax.plot(x1, iv_u, 'r--') # ax.plot(x1, iv_l, 'r--') ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color="k") ax.set_title(title, fontsize=fontsize) return fig
def plot_fit(res, exog_idx, exog_name='', y_true=None, ax=None, fontsize='small'): """Plot fit against one regressor. This creates one graph with the scatterplot of observed values compared to fitted values. Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix y_true : array_like (optional) If this is not None, then the array is added to the plot ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- This is currently very simple, no options or varnames yet. """ fig, ax = utils.create_mpl_ax(ax) if exog_name == '': exog_name = 'variable %d' % exog_idx #maybe add option for wendog, wexog y = res.model.endog x1 = res.model.exog[:, exog_idx] x1_argsort = np.argsort(x1) y = y[x1_argsort] x1 = x1[x1_argsort] ax.plot(x1, y, 'bo', label='observed') if not y_true is None: ax.plot(x1, y_true[x1_argsort], 'b-', label='true') title = 'fitted versus regressor %s' % exog_name else: title = 'fitted versus regressor %s' % exog_name prstd, iv_l, iv_u = wls_prediction_std(res) ax.plot(x1, res.fittedvalues[x1_argsort], 'k-', label='fitted') #'k-o') #ax.plot(x1, iv_u, 'r--') #ax.plot(x1, iv_l, 'r--') ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k') ax.set_title(title, fontsize=fontsize) return fig
def summary_table(res, alpha=0.05): '''generate summary table of outlier and influence similar to SAS Parameters ---------- alpha : float significance level for confidence interval Returns ------- st : SimpleTable instance table with results that can be printed data : ndarray calculated measures and statistics for the table ss2 : list of strings column_names for table (Note: rows of table are observations) ''' from scipy import stats from gwstatsmodels.sandbox.regression.predstd import wls_prediction_std infl = Influence(res) #standard error for predicted mean #Note: using hat_matrix only works for fitted values predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid) tppf = stats.t.isf(alpha/2., res.df_resid) predict_mean_ci = np.column_stack([ res.fittedvalues - tppf * predict_mean_se, res.fittedvalues + tppf * predict_mean_se]) #standard error for predicted observation predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res) predict_ci = np.column_stack((predict_ci_low, predict_ci_upp)) #standard deviation of residual resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag)) table_sm = np.column_stack([ np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues, predict_mean_se, predict_mean_ci[:,0], predict_mean_ci[:,1], predict_ci[:,0], predict_ci[:,1], res.resid, resid_se, infl.resid_studentized_internal, infl.cooks_distance[0] ]) #colnames, data = zip(*table_raw) #unzip data = table_sm ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"] colnames = ss2 #self.table_data = data #data = np.column_stack(data) from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt from gwstatsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html) return st, data, ss2
def summary_table(res, alpha=0.05): '''generate summary table of outlier and influence similar to SAS Parameters ---------- alpha : float significance level for confidence interval Returns ------- st : SimpleTable instance table with results that can be printed data : ndarray calculated measures and statistics for the table ss2 : list of strings column_names for table (Note: rows of table are observations) ''' from scipy import stats from gwstatsmodels.sandbox.regression.predstd import wls_prediction_std infl = Influence(res) #standard error for predicted mean #Note: using hat_matrix only works for fitted values predict_mean_se = np.sqrt(infl.hat_matrix_diag * res.mse_resid) tppf = stats.t.isf(alpha / 2., res.df_resid) predict_mean_ci = np.column_stack([ res.fittedvalues - tppf * predict_mean_se, res.fittedvalues + tppf * predict_mean_se ]) #standard error for predicted observation predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res) predict_ci = np.column_stack((predict_ci_low, predict_ci_upp)) #standard deviation of residual resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag)) table_sm = np.column_stack([ np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues, predict_mean_se, predict_mean_ci[:, 0], predict_mean_ci[:, 1], predict_ci[:, 0], predict_ci[:, 1], res.resid, resid_se, infl.resid_studentized_internal, infl.cooks_distance[0] ]) #colnames, data = zip(*table_raw) #unzip data = table_sm ss2 = [ 'Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD" ] colnames = ss2 #self.table_data = data #data = np.column_stack(data) from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt from gwstatsmodels.iolib.tableformatting import fmt_base from copy import deepcopy fmt = deepcopy(fmt_base) fmt_html = deepcopy(default_html_fmt) fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1) #fmt_html['data_fmts'] = fmt['data_fmts'] st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html) return st, data, ss2
# estimate only linear function, misspecified because of non-linear terms exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) # plt.figure() # plt.plot(x1, y, 'o', x1, y_true, 'b-') res = sm.OLS(y, exog0).fit() # print res.params # print res.bse plot_old = 0 # True if plot_old: # current bug predict requires call to model.results # print res.model.predict prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot(x1, res.fittedvalues, "r-o") plt.plot(x1, iv_u, "r--") plt.plot(x1, iv_l, "r--") plt.title("blue: true, red: OLS") plt.figure() plt.plot(res.resid, "o") plt.title("Residuals") fig2 = plt.figure() ax = fig2.add_subplot(2, 1, 1) # namestr = ' for %s' % self.name if self.name else '' plt.plot(x1, res.resid, "o") ax.set_title("residuals versus exog") # + namestr) ax = fig2.add_subplot(2, 1, 2)
exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) # plt.figure() # plt.plot(x1, y, 'o', x1, y_true, 'b-') res = sm.OLS(y, exog0).fit() #print res.params #print res.bse plot_old = 0 #True if plot_old: #current bug predict requires call to model.results #print res.model.predict prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot(x1, res.fittedvalues, 'r-o') plt.plot(x1, iv_u, 'r--') plt.plot(x1, iv_l, 'r--') plt.title('blue: true, red: OLS') plt.figure() plt.plot(res.resid, 'o') plt.title('Residuals') fig2 = plt.figure() ax = fig2.add_subplot(2,1,1) #namestr = ' for %s' % self.name if self.name else '' plt.plot(x1, res.resid, 'o') ax.set_title('residuals versus exog')# + namestr) ax = fig2.add_subplot(2,1,2)