def test_ceres_poisson(self, close_figures):

        np.random.seed(3446)

        n = 100
        p = 3
        exog = np.random.normal(size=(n, p))
        exog[:, 0] = 1
        lin_pred = 4 + exog[:, 1] + 0.2*exog[:, 2]**2
        expval = np.exp(lin_pred)
        endog = np.random.poisson(expval)

        model = sm.GLM(endog, exog, family=sm.families.Poisson())
        results = model.fit()

        for focus_col in 1, 2:
            for j in 0, 1:
                if j == 0:
                    fig = plot_ceres_residuals(results, focus_col)
                else:
                    fig = results.plot_ceres_residuals(focus_col)
                ax = fig.get_axes()[0]
                add_lowess(ax)
                ax.set_position([0.1, 0.1, 0.8, 0.77])
                effect_str = ["Intercept", "Linear effect, slope=1",
                              "Quadratic effect"][focus_col]
                ti = "CERES plot"
                if j == 1:
                    ti += " (called as method)"
                ax.set_title(ti + "\nPoisson regression\n" +
                             effect_str)
                close_or_save(pdf, fig)
Exemple #2
0
    def test_ceres_poisson(self):

        np.random.seed(3446)

        n = 100
        p = 3
        exog = np.random.normal(size=(n, p))
        exog[:, 0] = 1
        lin_pred = 4 + exog[:, 1] + 0.2 * exog[:, 2]**2
        expval = np.exp(lin_pred)
        endog = np.random.poisson(expval)

        model = sm.GLM(endog, exog, family=sm.families.Poisson())
        results = model.fit()

        for focus_col in 1, 2:
            for j in 0, 1:
                if j == 0:
                    fig = plot_ceres_residuals(results, focus_col)
                else:
                    fig = results.plot_ceres_residuals(focus_col)
                ax = fig.get_axes()[0]
                add_lowess(ax)
                ax.set_position([0.1, 0.1, 0.8, 0.77])
                effect_str = [
                    "Intercept", "Linear effect, slope=1", "Quadratic effect"
                ][focus_col]
                ti = "CERES plot"
                if j == 1:
                    ti += " (called as method)"
                ax.set_title(ti + "\nPoisson regression\n" + effect_str)
                close_or_save(pdf, fig)
  def test_plot_oth(self, close_figures):
      #just test that they run
      res = self.res
      plot_fit(res, 0, y_true=None)
      plot_partregress_grid(res, exog_idx=[0,1])
      plot_regress_exog(res, exog_idx=0)
      plot_ccpr(res, exog_idx=0)
      plot_ccpr_grid(res, exog_idx=[0])
      fig = plot_ccpr_grid(res, exog_idx=[0,1])
      for ax in fig.axes:
          add_lowess(ax)
 
      close_or_save(pdf, fig)
    def test_plot_oth(self, close_figures):
        #just test that they run
        res = self.res
        plot_fit(res, 0, y_true=None)
        plot_partregress_grid(res, exog_idx=[0, 1])
        plot_regress_exog(res, exog_idx=0)
        plot_ccpr(res, exog_idx=0)
        plot_ccpr_grid(res, exog_idx=[0])
        fig = plot_ccpr_grid(res, exog_idx=[0, 1])
        for ax in fig.axes:
            add_lowess(ax)

        close_or_save(pdf, fig)
    def test_added_variable_poisson(self, close_figures):

        np.random.seed(3446)

        n = 100
        p = 3
        exog = np.random.normal(size=(n, p))
        lin_pred = 4 + exog[:, 0] + 0.2 * exog[:, 1]**2
        expval = np.exp(lin_pred)
        endog = np.random.poisson(expval)

        model = sm.GLM(endog, exog, family=sm.families.Poisson())
        results = model.fit()

        for focus_col in 0, 1, 2:
            for use_glm_weights in False, True:
                for resid_type in "resid_deviance", "resid_response":
                    weight_str = ["Unweighted", "Weighted"][use_glm_weights]

                    # Run directly and called as a results method.
                    for j in 0, 1:

                        if j == 0:
                            fig = plot_added_variable(
                                results,
                                focus_col,
                                use_glm_weights=use_glm_weights,
                                resid_type=resid_type)
                            ti = "Added variable plot"
                        else:
                            fig = results.plot_added_variable(
                                focus_col,
                                use_glm_weights=use_glm_weights,
                                resid_type=resid_type)
                            ti = "Added variable plot (called as method)"
                        ax = fig.get_axes()[0]

                        add_lowess(ax)
                        ax.set_position([0.1, 0.1, 0.8, 0.7])
                        effect_str = [
                            "Linear effect, slope=1", "Quadratic effect",
                            "No effect"
                        ][focus_col]
                        ti += "\nPoisson regression\n"
                        ti += effect_str + "\n"
                        ti += weight_str + "\n"
                        ti += "Using '%s' residuals" % resid_type
                        ax.set_title(ti)
                        close_or_save(pdf, fig)
                        close_figures()
    def test_plot_oth(self):
        #just test that they run
        res = self.res
        endog = res.model.endog
        exog = res.model.exog

        plot_fit(res, 0, y_true=None)
        plot_partregress_grid(res, exog_idx=[0,1])
        plot_regress_exog(res, exog_idx=0)
        plot_ccpr(res, exog_idx=0)
        plot_ccpr_grid(res, exog_idx=[0])
        fig = plot_ccpr_grid(res, exog_idx=[0,1])
        for ax in fig.axes:
            add_lowess(ax)

        plt.close('all')
Exemple #7
0
    def test_plot_oth(self):
        #just test that they run
        res = self.res
        endog = res.model.endog
        exog = res.model.exog

        plot_fit(res, 0, y_true=None)
        plot_partregress_grid(res, exog_idx=[0, 1])
        plot_regress_exog(res, exog_idx=0)
        plot_ccpr(res, exog_idx=0)
        plot_ccpr_grid(res, exog_idx=[0])
        fig = plot_ccpr_grid(res, exog_idx=[0, 1])
        for ax in fig.axes:
            add_lowess(ax)

        plt.close('all')
    def test_added_variable_poisson(self, close_figures):

        np.random.seed(3446)

        n = 100
        p = 3
        exog = np.random.normal(size=(n, p))
        lin_pred = 4 + exog[:, 0] + 0.2 * exog[:, 1]**2
        expval = np.exp(lin_pred)
        endog = np.random.poisson(expval)

        model = sm.GLM(endog, exog, family=sm.families.Poisson())
        results = model.fit()

        for focus_col in 0, 1, 2:
            for use_glm_weights in False, True:
                for resid_type in "resid_deviance", "resid_response":
                    weight_str = ["Unweighted", "Weighted"][use_glm_weights]

                    # Run directly and called as a results method.
                    for j in 0, 1:

                        if j == 0:
                            fig = plot_added_variable(results, focus_col,
                                                      use_glm_weights=use_glm_weights,
                                                      resid_type=resid_type)
                            ti = "Added variable plot"
                        else:
                            fig = results.plot_added_variable(focus_col,
                                                              use_glm_weights=use_glm_weights,
                                                              resid_type=resid_type)
                            ti = "Added variable plot (called as method)"
                        ax = fig.get_axes()[0]

                        add_lowess(ax)
                        ax.set_position([0.1, 0.1, 0.8, 0.7])
                        effect_str = ["Linear effect, slope=1",
                                      "Quadratic effect", "No effect"][focus_col]
                        ti += "\nPoisson regression\n"
                        ti += effect_str + "\n"
                        ti += weight_str + "\n"
                        ti += "Using '%s' residuals" % resid_type
                        ax.set_title(ti)
                        close_or_save(pdf, fig)
                        close_figures()
Exemple #9
0
def test_plots():

    np.random.seed(378)
    n = 200
    exog = np.random.normal(size=(n, 2))
    lin_pred = exog[:, 0] + exog[:, 1]**2
    prob = 1 / (1 + np.exp(-lin_pred))
    endog = 1 * (np.random.uniform(size=n) < prob)

    model = sm.GLM(endog, exog, family=sm.families.Binomial())
    result = model.fit()

    import matplotlib.pyplot as plt
    import pandas as pd
    from statsmodels.graphics.regressionplots import add_lowess

    # array interface
    for j in 0, 1:
        fig = result.plot_added_variable(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_partial_residuals(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_ceres_residuals(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)

    # formula interface
    data = pd.DataFrame({"y": endog, "x1": exog[:, 0], "x2": exog[:, 1]})
    model = sm.GLM.from_formula("y ~ x1 + x2",
                                data,
                                family=sm.families.Binomial())
    result = model.fit()
    for j in 0, 1:
        xname = ["x1", "x2"][j]
        fig = result.plot_added_variable(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_partial_residuals(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_ceres_residuals(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
Exemple #10
0
# rise slightly faster for people between the ages of 20 and 35, and
# again for people between the ages of 50 and 60, with a period of
# minimal increase between these age intervals.  This would contradict
# the perfectly linear model for age (on the log odds scale) that we
# have specified in our model.  These plotting techniques can be useful
# at identifying possible opportunities for future analysis with
# additional data, but do not identify features that can be claimed with
# high confidence using the present data.

# +
fig = result.plot_partial_residuals("RIDAGEYR")
ax = fig.get_axes()[0]
ax.lines[0].set_alpha(0.2)

from statsmodels.graphics.regressionplots import add_lowess
_ = add_lowess(ax)

fig = result.plot_added_variable("RIDAGEYR")
ax = fig.get_axes()[0]
ax.lines[0].set_alpha(0.2)
_ = add_lowess(ax)

fig = result.plot_ceres_residuals("RIDAGEYR")
ax = fig.get_axes()[0]
ax.lines[0].set_alpha(0.2)
_ = add_lowess(ax)
# -

## Poisson regression

# Poisson regression is a type of GLM based on the Poisson distribution.
Exemple #11
0
def test_plots():

    np.random.seed(378)
    n = 200
    exog = np.random.normal(size=(n, 2))
    lin_pred = exog[:, 0] + exog[:, 1]**2
    prob = 1 / (1 + np.exp(-lin_pred))
    endog = 1 * (np.random.uniform(size=n) < prob)

    model = sm.GLM(endog, exog, family=sm.families.Binomial())
    result = model.fit()

    import matplotlib.pyplot as plt
    import pandas as pd
    from statsmodels.graphics.regressionplots import add_lowess

    # array interface
    for j in 0,1:
        fig = result.plot_added_variable(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_partial_residuals(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_ceres_residuals(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)

    # formula interface
    data = pd.DataFrame({"y": endog, "x1": exog[:, 0], "x2": exog[:, 1]})
    model = sm.GLM.from_formula("y ~ x1 + x2", data, family=sm.families.Binomial())
    result = model.fit()
    for j in 0,1:
        xname = ["x1", "x2"][j]
        fig = result.plot_added_variable(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_partial_residuals(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_ceres_residuals(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
Exemple #12
0
# including main effects for age, BMI, and gender.  Note that we are
# using the GLM function here to fit the model.  GLM is a more general
# class of regression procedures that includes linear regression (OLS)
# as a special case.  OLS is the default for GLM, so the code below fits
# the identical model as fit above with the OLS function.  Currently, it
# is necessary to use GLM when fitting linear models if we want to
# produce added variable plots.

from statsmodels.graphics.regressionplots import add_lowess

model = sm.GLM.from_formula("BPXSY1 ~ RIDAGEYR + BMXBMI + RIAGENDRx", data=da)
result = model.fit()

fig = result.plot_added_variable("RIDAGEYR")
fig.axes[0].lines[0].set_alpha(0.2)
_ = add_lowess(fig.axes[0], frac=0.5)

# The plot above suggests that the increasing trend identified by the
# linear model is approximately correct, but that the true relationship
# may be slightly nonlinear, with positive curvature.  This means that
# the annual mean increment in blood pressure may get larger as people
# age.

# We won't get into the mathematical details of how added variable plots
# are constructed, but we note that the construction involves
# residualization.  Therefore, the "focus variable" (age above) is
# centered relative to its mean in the plot.

# As another example, below is the added variable plot for BMI:

from statsmodels.graphics.regressionplots import add_lowess
Exemple #13
0
# Make plot of the fitted neural net function against each covariate,
# holding the other covariate fixed at -1, 0, 1, SD relative to the mean.
for k, na in enumerate(xp.columns):
    plt.clf()
    ax = plt.axes()
    plt.grid(True)
    for b in -1, 0, 1:
        xx = xp.copy().values
        for j in range(xx.shape[1]):
            if j != k:
                xx[:, j] = xx[:, j].mean() + b
            else:
                xx[:, j] = np.linspace(xx[:, j].min(), xx[:, j].max(), xx.shape[0])
        yy = net(torch.tensor(xx.astype(np.float32)))
        yy = yy.detach().numpy()
        plt.plot(xx[:, k], yy, '-', lw=4)
    plt.xlabel(na, size=16)
    plt.ylabel("Predicted SBP-Z", size=16)
    pdf.savefig()

# Make added variable plots for each covariate
from statsmodels.graphics.regressionplots import add_lowess
for k, na in enumerate(xp.columns):
    plt.clf()
    ax = plt.axes(rasterized=True)
    plt.grid(True)
    result.plot_added_variable(na, ax=ax)
    add_lowess(ax)
    pdf.savefig()

pdf.close()