def test_ceres_poisson(self, close_figures): np.random.seed(3446) n = 100 p = 3 exog = np.random.normal(size=(n, p)) exog[:, 0] = 1 lin_pred = 4 + exog[:, 1] + 0.2*exog[:, 2]**2 expval = np.exp(lin_pred) endog = np.random.poisson(expval) model = sm.GLM(endog, exog, family=sm.families.Poisson()) results = model.fit() for focus_col in 1, 2: for j in 0, 1: if j == 0: fig = plot_ceres_residuals(results, focus_col) else: fig = results.plot_ceres_residuals(focus_col) ax = fig.get_axes()[0] add_lowess(ax) ax.set_position([0.1, 0.1, 0.8, 0.77]) effect_str = ["Intercept", "Linear effect, slope=1", "Quadratic effect"][focus_col] ti = "CERES plot" if j == 1: ti += " (called as method)" ax.set_title(ti + "\nPoisson regression\n" + effect_str) close_or_save(pdf, fig)
def test_ceres_poisson(self): np.random.seed(3446) n = 100 p = 3 exog = np.random.normal(size=(n, p)) exog[:, 0] = 1 lin_pred = 4 + exog[:, 1] + 0.2 * exog[:, 2]**2 expval = np.exp(lin_pred) endog = np.random.poisson(expval) model = sm.GLM(endog, exog, family=sm.families.Poisson()) results = model.fit() for focus_col in 1, 2: for j in 0, 1: if j == 0: fig = plot_ceres_residuals(results, focus_col) else: fig = results.plot_ceres_residuals(focus_col) ax = fig.get_axes()[0] add_lowess(ax) ax.set_position([0.1, 0.1, 0.8, 0.77]) effect_str = [ "Intercept", "Linear effect, slope=1", "Quadratic effect" ][focus_col] ti = "CERES plot" if j == 1: ti += " (called as method)" ax.set_title(ti + "\nPoisson regression\n" + effect_str) close_or_save(pdf, fig)
def test_plot_oth(self, close_figures): #just test that they run res = self.res plot_fit(res, 0, y_true=None) plot_partregress_grid(res, exog_idx=[0,1]) plot_regress_exog(res, exog_idx=0) plot_ccpr(res, exog_idx=0) plot_ccpr_grid(res, exog_idx=[0]) fig = plot_ccpr_grid(res, exog_idx=[0,1]) for ax in fig.axes: add_lowess(ax) close_or_save(pdf, fig)
def test_plot_oth(self, close_figures): #just test that they run res = self.res plot_fit(res, 0, y_true=None) plot_partregress_grid(res, exog_idx=[0, 1]) plot_regress_exog(res, exog_idx=0) plot_ccpr(res, exog_idx=0) plot_ccpr_grid(res, exog_idx=[0]) fig = plot_ccpr_grid(res, exog_idx=[0, 1]) for ax in fig.axes: add_lowess(ax) close_or_save(pdf, fig)
def test_added_variable_poisson(self, close_figures): np.random.seed(3446) n = 100 p = 3 exog = np.random.normal(size=(n, p)) lin_pred = 4 + exog[:, 0] + 0.2 * exog[:, 1]**2 expval = np.exp(lin_pred) endog = np.random.poisson(expval) model = sm.GLM(endog, exog, family=sm.families.Poisson()) results = model.fit() for focus_col in 0, 1, 2: for use_glm_weights in False, True: for resid_type in "resid_deviance", "resid_response": weight_str = ["Unweighted", "Weighted"][use_glm_weights] # Run directly and called as a results method. for j in 0, 1: if j == 0: fig = plot_added_variable( results, focus_col, use_glm_weights=use_glm_weights, resid_type=resid_type) ti = "Added variable plot" else: fig = results.plot_added_variable( focus_col, use_glm_weights=use_glm_weights, resid_type=resid_type) ti = "Added variable plot (called as method)" ax = fig.get_axes()[0] add_lowess(ax) ax.set_position([0.1, 0.1, 0.8, 0.7]) effect_str = [ "Linear effect, slope=1", "Quadratic effect", "No effect" ][focus_col] ti += "\nPoisson regression\n" ti += effect_str + "\n" ti += weight_str + "\n" ti += "Using '%s' residuals" % resid_type ax.set_title(ti) close_or_save(pdf, fig) close_figures()
def test_plot_oth(self): #just test that they run res = self.res endog = res.model.endog exog = res.model.exog plot_fit(res, 0, y_true=None) plot_partregress_grid(res, exog_idx=[0,1]) plot_regress_exog(res, exog_idx=0) plot_ccpr(res, exog_idx=0) plot_ccpr_grid(res, exog_idx=[0]) fig = plot_ccpr_grid(res, exog_idx=[0,1]) for ax in fig.axes: add_lowess(ax) plt.close('all')
def test_plot_oth(self): #just test that they run res = self.res endog = res.model.endog exog = res.model.exog plot_fit(res, 0, y_true=None) plot_partregress_grid(res, exog_idx=[0, 1]) plot_regress_exog(res, exog_idx=0) plot_ccpr(res, exog_idx=0) plot_ccpr_grid(res, exog_idx=[0]) fig = plot_ccpr_grid(res, exog_idx=[0, 1]) for ax in fig.axes: add_lowess(ax) plt.close('all')
def test_added_variable_poisson(self, close_figures): np.random.seed(3446) n = 100 p = 3 exog = np.random.normal(size=(n, p)) lin_pred = 4 + exog[:, 0] + 0.2 * exog[:, 1]**2 expval = np.exp(lin_pred) endog = np.random.poisson(expval) model = sm.GLM(endog, exog, family=sm.families.Poisson()) results = model.fit() for focus_col in 0, 1, 2: for use_glm_weights in False, True: for resid_type in "resid_deviance", "resid_response": weight_str = ["Unweighted", "Weighted"][use_glm_weights] # Run directly and called as a results method. for j in 0, 1: if j == 0: fig = plot_added_variable(results, focus_col, use_glm_weights=use_glm_weights, resid_type=resid_type) ti = "Added variable plot" else: fig = results.plot_added_variable(focus_col, use_glm_weights=use_glm_weights, resid_type=resid_type) ti = "Added variable plot (called as method)" ax = fig.get_axes()[0] add_lowess(ax) ax.set_position([0.1, 0.1, 0.8, 0.7]) effect_str = ["Linear effect, slope=1", "Quadratic effect", "No effect"][focus_col] ti += "\nPoisson regression\n" ti += effect_str + "\n" ti += weight_str + "\n" ti += "Using '%s' residuals" % resid_type ax.set_title(ti) close_or_save(pdf, fig) close_figures()
def test_plots(): np.random.seed(378) n = 200 exog = np.random.normal(size=(n, 2)) lin_pred = exog[:, 0] + exog[:, 1]**2 prob = 1 / (1 + np.exp(-lin_pred)) endog = 1 * (np.random.uniform(size=n) < prob) model = sm.GLM(endog, exog, family=sm.families.Binomial()) result = model.fit() import matplotlib.pyplot as plt import pandas as pd from statsmodels.graphics.regressionplots import add_lowess # array interface for j in 0, 1: fig = result.plot_added_variable(j) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_partial_residuals(j) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_ceres_residuals(j) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) # formula interface data = pd.DataFrame({"y": endog, "x1": exog[:, 0], "x2": exog[:, 1]}) model = sm.GLM.from_formula("y ~ x1 + x2", data, family=sm.families.Binomial()) result = model.fit() for j in 0, 1: xname = ["x1", "x2"][j] fig = result.plot_added_variable(xname) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_partial_residuals(xname) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_ceres_residuals(xname) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig)
# rise slightly faster for people between the ages of 20 and 35, and # again for people between the ages of 50 and 60, with a period of # minimal increase between these age intervals. This would contradict # the perfectly linear model for age (on the log odds scale) that we # have specified in our model. These plotting techniques can be useful # at identifying possible opportunities for future analysis with # additional data, but do not identify features that can be claimed with # high confidence using the present data. # + fig = result.plot_partial_residuals("RIDAGEYR") ax = fig.get_axes()[0] ax.lines[0].set_alpha(0.2) from statsmodels.graphics.regressionplots import add_lowess _ = add_lowess(ax) fig = result.plot_added_variable("RIDAGEYR") ax = fig.get_axes()[0] ax.lines[0].set_alpha(0.2) _ = add_lowess(ax) fig = result.plot_ceres_residuals("RIDAGEYR") ax = fig.get_axes()[0] ax.lines[0].set_alpha(0.2) _ = add_lowess(ax) # - ## Poisson regression # Poisson regression is a type of GLM based on the Poisson distribution.
def test_plots(): np.random.seed(378) n = 200 exog = np.random.normal(size=(n, 2)) lin_pred = exog[:, 0] + exog[:, 1]**2 prob = 1 / (1 + np.exp(-lin_pred)) endog = 1 * (np.random.uniform(size=n) < prob) model = sm.GLM(endog, exog, family=sm.families.Binomial()) result = model.fit() import matplotlib.pyplot as plt import pandas as pd from statsmodels.graphics.regressionplots import add_lowess # array interface for j in 0,1: fig = result.plot_added_variable(j) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_partial_residuals(j) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_ceres_residuals(j) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) # formula interface data = pd.DataFrame({"y": endog, "x1": exog[:, 0], "x2": exog[:, 1]}) model = sm.GLM.from_formula("y ~ x1 + x2", data, family=sm.families.Binomial()) result = model.fit() for j in 0,1: xname = ["x1", "x2"][j] fig = result.plot_added_variable(xname) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_partial_residuals(xname) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig) fig = result.plot_ceres_residuals(xname) add_lowess(fig.axes[0], frac=0.5) close_or_save(pdf, fig)
# including main effects for age, BMI, and gender. Note that we are # using the GLM function here to fit the model. GLM is a more general # class of regression procedures that includes linear regression (OLS) # as a special case. OLS is the default for GLM, so the code below fits # the identical model as fit above with the OLS function. Currently, it # is necessary to use GLM when fitting linear models if we want to # produce added variable plots. from statsmodels.graphics.regressionplots import add_lowess model = sm.GLM.from_formula("BPXSY1 ~ RIDAGEYR + BMXBMI + RIAGENDRx", data=da) result = model.fit() fig = result.plot_added_variable("RIDAGEYR") fig.axes[0].lines[0].set_alpha(0.2) _ = add_lowess(fig.axes[0], frac=0.5) # The plot above suggests that the increasing trend identified by the # linear model is approximately correct, but that the true relationship # may be slightly nonlinear, with positive curvature. This means that # the annual mean increment in blood pressure may get larger as people # age. # We won't get into the mathematical details of how added variable plots # are constructed, but we note that the construction involves # residualization. Therefore, the "focus variable" (age above) is # centered relative to its mean in the plot. # As another example, below is the added variable plot for BMI: from statsmodels.graphics.regressionplots import add_lowess
# Make plot of the fitted neural net function against each covariate, # holding the other covariate fixed at -1, 0, 1, SD relative to the mean. for k, na in enumerate(xp.columns): plt.clf() ax = plt.axes() plt.grid(True) for b in -1, 0, 1: xx = xp.copy().values for j in range(xx.shape[1]): if j != k: xx[:, j] = xx[:, j].mean() + b else: xx[:, j] = np.linspace(xx[:, j].min(), xx[:, j].max(), xx.shape[0]) yy = net(torch.tensor(xx.astype(np.float32))) yy = yy.detach().numpy() plt.plot(xx[:, k], yy, '-', lw=4) plt.xlabel(na, size=16) plt.ylabel("Predicted SBP-Z", size=16) pdf.savefig() # Make added variable plots for each covariate from statsmodels.graphics.regressionplots import add_lowess for k, na in enumerate(xp.columns): plt.clf() ax = plt.axes(rasterized=True) plt.grid(True) result.plot_added_variable(na, ax=ax) add_lowess(ax) pdf.savefig() pdf.close()