def test_s_fmt_lineoptions(self, close_figures): qqline(self.ax, "s", x=self.x, y=self.y, fmt=self.fmt, **self.lineoptions)
def test_q_fmt_lineoptions(self, close_figures): qqline( self.ax, "q", dist=stats.norm, x=self.x, y=self.y, fmt=self.fmt, **self.lineoptions, )
def test_q_fmt(self, close_figures): qqline(self.ax, "q", dist=stats.norm, x=self.x, y=self.y, fmt=self.fmt)
def test_q(self, close_figures): nchildren = len(self.ax.get_children()) qqline(self.ax, "q", dist=stats.norm, x=self.x, y=self.y) assert len(self.ax.get_children()) > nchildren
def test_s(self, close_figures): nchildren = len(self.ax.get_children()) qqline(self.ax, "s", x=self.x, y=self.y) assert len(self.ax.get_children()) > nchildren
def test_s_fmt(self, close_figures): qqline(self.ax, "s", x=self.x, y=self.y, fmt=self.fmt)
def test_45_fmt(self, close_figures): qqline(self.ax, "45", fmt=self.fmt)
def test_45_fmt_lineoptions(self, close_figures): qqline(self.ax, "45", fmt=self.fmt, **self.lineoptions)
def test_non45_no_x_no_y(self, close_figures): with pytest.raises(ValueError): qqline(self.ax, "s")
def test_45(self, close_figures): nchildren = len(self.ax.get_children()) qqline(self.ax, "45") assert len(self.ax.get_children()) > nchildren
def test_badline(self): with pytest.raises(ValueError): qqline(self.ax, "junk")
# Build a regression model for 'stay' versus 't' model1 = smf.ols('stay ~ t', data=brexit).fit() # Examine the model output model1.summary() model1.summary2() # Produce the following diagnostic plots: # * Predicted versus observed sns.jointplot(brexit.stay, model1.fittedvalues) # * Residuals versus predicted sns.jointplot(model1.fittedvalues, model1.resid) # * Residuals versus 't' sns.pointplot(brexit.t, model1.resid, join=False) # * Autocorrelation plot autocorrelation_plot(model1.resid) # * Normal Q-Q plot for (Studentised) residuals st_resid = model1.get_influence().get_resid_studentized_external() qq = smg.qqplot(st_resid) smg.qqline(qq.gca(), '45') # BONUS: Build a second regression model for 'stay' versus 't' and 'pollster', # and re-run all of the above model2 = smf.ols('stay ~ t + pollster', data=brexit).fit()
''' Import the food expenditure dataset. Plot annual food expenditure on x-axis and household income on y-axis. Use qqline to add regression line into the plot. ''' import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.graphics.gofplots import qqline foodexp = sm.datasets.engel.load() x = foodexp.exog y = foodexp.endog ax = plt.subplot(111) plt.scatter(x, y) ax.set_xlabel(foodexp.exog_name[0]) ax.set_ylabel(foodexp.endog_name) qqline(ax, 'r', x, y) plt.show()
plt.boxplot(fat) plt.show() # (c) plt.scatter(age, fat, c="green", alpha=0.5) plt.show() stats.probplot(age, dist="norm", plot=pylab) stats.probplot(fat, dist="norm", plot=pylab) pylab.show() ax = plt.subplot(111) plt.scatter(age, fat) qqline(ax, 'r', age, fat) plt.show() ######################################################### # 4. # (a) def cos_sim(A, B): return dot(A, B) / (norm(A) * norm(B)) print(cos_sim((1.5, 1.7), (1.4, 1.6))) print(cos_sim((2.0, 1.9), (1.4, 1.6))) print(cos_sim((1.6, 1.8), (1.4, 1.6)))
''' Import the food expenditure dataset. Plot annual food expendeture on x-axis and household income on y-axis. Use qqline to add regression line into the plot. ''' import statsmodels.api as sm import matplotlib.pyplot as plt from statsmodels.graphics.gofplots import qqline foodexp = sm.datasets.engel.load(as_pandas=False) x = foodexp.exog y = foodexp.endog ax = plt.subplot(111) plt.scatter(x, y) ax.set_xlabel(foodexp.exog_name[0]) ax.set_ylabel(foodexp.endog_name) qqline(ax, 'r', x, y) plt.show()