def cook_distance(self):
     '''Computes and plots Cook\'s distance'''
     import statsmodels.api as sm
     from statsmodels.stats.outliers_influence import OLSInfluence as influence
     lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
     inf = influence(lm)
     (c, p) = inf.cooks_distance
     plt.figure(figsize=(8, 5))
     plt.title("Cook's distance plot for the residuals", fontsize=14)
     plt.stem(np.arange(len(c)), c, markerfmt=",", use_line_collection=True)
     plt.grid(True)
     plt.show()
예제 #2
0
plt.hist(fitted.resid_pearson, bins=8, edgecolor='K')
plt.ylabel("Count", fontsize=15)
plt.xlabel("Normalized Residuals", fontsize=15)
plt.title("Histogram of normalized residuals", fontsize=18)
plt.show()

# Quantile-Quantile Plot
plt.figure(figsize=(8, 5))
fig = qqplot(fitted.resid_pearson, line="45", fit='True')
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.xlabel("Theoratical Quantiles", fontsize=15)
plt.ylabel("Sample Quantiles", fontsize=15)
plt.title("Q-Q plot of normalized residuals", fontsize=18)
plt.grid(True)
plt.show()

# Checking for outliers in residuals
# Green line is 4x > Cook's distance mean
inf = influence(fitted)
(c, p) = inf.cooks_distance
plt.figure(figsize=(8, 5))
plt.title("Cook's distance plot for the residuals", fontsize=16)
plt.stem(np.arange(len(c)), c, markerfmt=",", use_line_collection=True)
plt.hlines(y=c.mean() * 4,
           xmin=0,
           xmax=fitted.fittedvalues * 1.1,
           colors='green',
           linestyle='--')
plt.grid(True)
plt.show()
예제 #3
0
fig.tight_layout()
fig.delaxes(axs[1, 1])
axs[0,1].scatter(x=results.fittedvalues,y=results.resid,edgecolor='k')
xmin = min(results.fittedvalues)
xmax = max(results.fittedvalues)
axs[0,1].hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3)
axs[0,1].set_xlabel("Fitted values",fontsize=10)
axs[0,1].set_ylabel("Residuals",fontsize=10)
axs[0,1].set_title("Fitted vs. residuals plot",fontsize=10)

stats.probplot(results.resid_pearson, plot=plt, fit=True)
axs[1,0].set_xlabel("Theoretical quantiles",fontsize=10)
axs[1,0].set_ylabel("Sample quantiles",fontsize=10)
axs[1,0].set_title("Q-Q plot of normalized residuals",fontsize=10)

inf=influence(results)
(c, p) = inf.cooks_distance
axs[0,0].stem(np.arange(len(c)), c, markerfmt=",")
axs[0,0].set_title("Cook's distance plot for the residuals",fontsize=10)
plt.subplots_adjust(left=0.1, wspace=0.4, hspace=0.4)
plt.show()

# fit OLS model with explanatory variables
X = US_cases_latest_week[['percent_age65over', 'percent_female', 'percent_black']]
Y = US_cases_latest_week['cases_count_pos']
X = sm.add_constant(X)
model_last_week2 = sm.OLS(Y,X)
results2 = model_last_week2.fit()
print(results2.summary())

# model diagnostics