def cook_distance(self): '''Computes and plots Cook\'s distance''' import statsmodels.api as sm from statsmodels.stats.outliers_influence import OLSInfluence as influence lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit() inf = influence(lm) (c, p) = inf.cooks_distance plt.figure(figsize=(8, 5)) plt.title("Cook's distance plot for the residuals", fontsize=14) plt.stem(np.arange(len(c)), c, markerfmt=",", use_line_collection=True) plt.grid(True) plt.show()
plt.hist(fitted.resid_pearson, bins=8, edgecolor='K') plt.ylabel("Count", fontsize=15) plt.xlabel("Normalized Residuals", fontsize=15) plt.title("Histogram of normalized residuals", fontsize=18) plt.show() # Quantile-Quantile Plot plt.figure(figsize=(8, 5)) fig = qqplot(fitted.resid_pearson, line="45", fit='True') plt.xticks(fontsize=13) plt.yticks(fontsize=13) plt.xlabel("Theoratical Quantiles", fontsize=15) plt.ylabel("Sample Quantiles", fontsize=15) plt.title("Q-Q plot of normalized residuals", fontsize=18) plt.grid(True) plt.show() # Checking for outliers in residuals # Green line is 4x > Cook's distance mean inf = influence(fitted) (c, p) = inf.cooks_distance plt.figure(figsize=(8, 5)) plt.title("Cook's distance plot for the residuals", fontsize=16) plt.stem(np.arange(len(c)), c, markerfmt=",", use_line_collection=True) plt.hlines(y=c.mean() * 4, xmin=0, xmax=fitted.fittedvalues * 1.1, colors='green', linestyle='--') plt.grid(True) plt.show()
fig.tight_layout() fig.delaxes(axs[1, 1]) axs[0,1].scatter(x=results.fittedvalues,y=results.resid,edgecolor='k') xmin = min(results.fittedvalues) xmax = max(results.fittedvalues) axs[0,1].hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3) axs[0,1].set_xlabel("Fitted values",fontsize=10) axs[0,1].set_ylabel("Residuals",fontsize=10) axs[0,1].set_title("Fitted vs. residuals plot",fontsize=10) stats.probplot(results.resid_pearson, plot=plt, fit=True) axs[1,0].set_xlabel("Theoretical quantiles",fontsize=10) axs[1,0].set_ylabel("Sample quantiles",fontsize=10) axs[1,0].set_title("Q-Q plot of normalized residuals",fontsize=10) inf=influence(results) (c, p) = inf.cooks_distance axs[0,0].stem(np.arange(len(c)), c, markerfmt=",") axs[0,0].set_title("Cook's distance plot for the residuals",fontsize=10) plt.subplots_adjust(left=0.1, wspace=0.4, hspace=0.4) plt.show() # fit OLS model with explanatory variables X = US_cases_latest_week[['percent_age65over', 'percent_female', 'percent_black']] Y = US_cases_latest_week['cases_count_pos'] X = sm.add_constant(X) model_last_week2 = sm.OLS(Y,X) results2 = model_last_week2.fit() print(results2.summary()) # model diagnostics