Ejemplos de predict_functional en Python, ejemplos de statsmodels.sandbox.predict_functional.predict_functional en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: kasunsp/pinalpha_mvp

    def test_formula(self):

        np.random.seed(542)
        n = 500
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        x4 = np.random.randint(0, 5, size=n)
        x4 = np.asarray(["ABCDE"[i] for i in x4])
        x5 = np.random.normal(size=n)
        y = 0.3 * x2**2 + (
            x4
            == "B") + 0.1 * (x4 == "B") * x2**2 + x5 + np.random.normal(size=n)

        df = pd.DataFrame({
            "y": y,
            "x1": x1,
            "x2": x2,
            "x3": x3,
            "x4": x4,
            "x5": x5
        })

        fml = "y ~ x1 + bs(x2, df=4) + x3 + x2*x3 + I(x1**2) + C(x4) + C(x4)*bs(x2, df=4) + x5"
        model = sm.OLS.from_formula(fml, data=df)
        result = model.fit()

        summaries = {"x1": np.mean, "x3": pctl(0.75), "x5": np.mean}

        values = {"x4": "B"}
        pr1, ci1, fvals1 = predict_functional(result, "x2", summaries, values)

        values = {"x4": "C"}
        pr2, ci2, fvals2 = predict_functional(result, "x2", summaries, values)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x4=B')
        plt.plot(fvals2, pr2, '-', label='x4=C')
        ha, lb = ax.get_legend_handles_labels()
        plt.figlegend(ha, lb, "center right")
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x4=B')
        plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
        plt.plot(fvals2, pr2, '-', label='x4=C')
        plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        plt.figlegend(ha, lb, "center right")
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: cong1989/statsmodels

    def test_formula(self):

        np.random.seed(542)
        n = 500
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        x4 = np.random.randint(0, 5, size=n)
        x4 = np.asarray(["ABCDE"[i] for i in x4])
        x5 = np.random.normal(size=n)
        y = 0.3*x2**2 + (x4 == "B") + 0.1*(x4 == "B")*x2**2 + x5 + np.random.normal(size=n)

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3, "x4": x4, "x5": x5})

        fml = "y ~ x1 + bs(x2, df=4) + x3 + x2*x3 + I(x1**2) + C(x4) + C(x4)*bs(x2, df=4) + x5"
        model = sm.OLS.from_formula(fml, data=df)
        result = model.fit()

        summaries = {"x1": np.mean, "x3": pctl(0.75), "x5": np.mean}

        values = {"x4": "B"}
        pr1, ci1, fvals1 = predict_functional(result, "x2", summaries, values)

        values = {"x4": "C"}
        pr2, ci2, fvals2 = predict_functional(result, "x2", summaries, values)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x4=B')
        plt.plot(fvals2, pr2, '-', label='x4=C')
        ha, lb = ax.get_legend_handles_labels()
        plt.figlegend(ha, lb, "center right")
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x4=B')
        plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
        plt.plot(fvals2, pr2, '-', label='x4=C')
        plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        plt.figlegend(ha, lb, "center right")
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: locolucco209/MongoScraper

    def test_lm_contrast(self):

        np.random.seed(542)
        n = 200
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        y = x1 + 2*x2 + x3 - x1*x2 + x2*x3 + np.random.normal(size=n)

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})

        fml = "y ~ x1 + x2 + x3 + x1*x2 + x2*x3"
        model = sm.OLS.from_formula(fml, data=df)
        result = model.fit()

        values = {"x2": 1, "x3": 1} # y = 4
        values2 = {"x2": 0, "x3": 0} # y = x1
        pr, cb, fvals = predict_functional(result, "x1", values=values,
                                           values2=values2, ci_method='scheffe')

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.67, 0.8])
        plt.plot(fvals, pr, '-', label="Estimate", color='orange', lw=4)
        plt.plot(fvals, 4 - fvals, '-', label="Truth", color='lime', lw=4)
        plt.fill_between(fvals, cb[:, 0], cb[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        leg = plt.figlegend(ha, lb, "center right")
        leg.draw_frame(False)
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Mean contrast", size=15)
        plt.title("Linear model contrast")
        self.close_or_save(fig)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: locolucco209/MongoScraper

    def test_glm_formula_contrast(self):

        np.random.seed(542)
        n = 50
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        mn = 5 + 0.1*x1 + 0.1*x2 + 0.1*x3 - 0.1*x1*x2
        y = np.random.poisson(np.exp(mn), size=len(mn))

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})

        fml = "y ~ x1 + x2 + x3 + x1*x2"
        model = sm.GLM.from_formula(fml, data=df, family=sm.families.Poisson())
        result = model.fit()

        values = {"x2": 1, "x3": 1} # y = 5.2
        values2 = {"x2": 0, "x3": 0} # y = 5 + 0.1*x1
        pr, cb, fvals = predict_functional(result, "x1", values=values,
                                           values2=values2, ci_method='simultaneous')

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.67, 0.8])
        plt.plot(fvals, pr, '-', label="Estimate", color='orange', lw=4)
        plt.plot(fvals, 0.2 - 0.1*fvals, '-', label="Truth", color='lime', lw=4)
        plt.fill_between(fvals, cb[:, 0], cb[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        leg = plt.figlegend(ha, lb, "center right")
        leg.draw_frame(False)
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Linear predictor contrast", size=15)
        plt.title("Poisson regression contrast")
        self.close_or_save(fig)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: cong1989/statsmodels

    def test_glm_formula_contrast(self):

        np.random.seed(542)
        n = 50
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        mn = 5 + 0.1*x1 + 0.1*x2 + 0.1*x3 - 0.1*x1*x2
        y = np.random.poisson(np.exp(mn), size=len(mn))

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})

        fml = "y ~ x1 + x2 + x3 + x1*x2"
        model = sm.GLM.from_formula(fml, data=df, family=sm.families.Poisson())
        result = model.fit()

        values = {"x2": 1, "x3": 1} # y = 5.2
        values2 = {"x2": 0, "x3": 0} # y = 5 + 0.1*x1
        pr, cb, fvals = predict_functional(result, "x1", values=values,
                                           values2=values2, ci_method='simultaneous')

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.67, 0.8])
        plt.plot(fvals, pr, '-', label="Estimate", color='orange', lw=4)
        plt.plot(fvals, 0.2 - 0.1*fvals, '-', label="Truth", color='lime', lw=4)
        plt.fill_between(fvals, cb[:, 0], cb[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        leg = plt.figlegend(ha, lb, "center right")
        leg.draw_frame(False)
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Linear predictor contrast", size=15)
        plt.title("Poisson regression contrast")
        self.close_or_save(fig)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: cong1989/statsmodels

    def test_lm_contrast(self):

        np.random.seed(542)
        n = 200
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        y = x1 + 2*x2 + x3 - x1*x2 + x2*x3 + np.random.normal(size=n)

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})

        fml = "y ~ x1 + x2 + x3 + x1*x2 + x2*x3"
        model = sm.OLS.from_formula(fml, data=df)
        result = model.fit()

        values = {"x2": 1, "x3": 1} # y = 4
        values2 = {"x2": 0, "x3": 0} # y = x1
        pr, cb, fvals = predict_functional(result, "x1", values=values,
                                           values2=values2, ci_method='scheffe')

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.67, 0.8])
        plt.plot(fvals, pr, '-', label="Estimate", color='orange', lw=4)
        plt.plot(fvals, 4 - fvals, '-', label="Truth", color='lime', lw=4)
        plt.fill_between(fvals, cb[:, 0], cb[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        leg = plt.figlegend(ha, lb, "center right")
        leg.draw_frame(False)
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Mean contrast", size=15)
        plt.title("Linear model contrast")
        self.close_or_save(fig)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: locolucco209/MongoScraper

    def test_noformula_prediction(self):

        np.random.seed(6434)
        n = 200
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        y = x1 - x2 + np.random.normal(size=n)

        exog = np.vstack((x1, x2, x3)).T

        model = sm.OLS(y, exog)
        result = model.fit()

        summaries = {"x3": pctl(0.75)}
        values = {"x2": 1}
        pr1, ci1, fvals1 = predict_functional(result, "x1", summaries, values)

        values = {"x2": -1}
        pr2, ci2, fvals2 = predict_functional(result, "x1", summaries, values)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x2=1', lw=4, alpha=0.6, color='orange')
        plt.plot(fvals2, pr2, '-', label='x2=-1', lw=4, alpha=0.6, color='lime')
        ha, lb = ax.get_legend_handles_labels()
        leg = plt.figlegend(ha, lb, "center right")
        leg.draw_frame(False)
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x2=1', lw=4, alpha=0.6, color='orange')
        plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
        plt.plot(fvals1, pr2, '-', label='x2=1', lw=4, alpha=0.6, color='lime')
        plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        plt.figlegend(ha, lb, "center right")
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: cong1989/statsmodels

    def test_noformula_prediction(self):

        np.random.seed(6434)
        n = 200
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.normal(size=n)
        y = x1 - x2 + np.random.normal(size=n)

        exog = np.vstack((x1, x2, x3)).T

        model = sm.OLS(y, exog)
        result = model.fit()

        summaries = {"x3": pctl(0.75)}
        values = {"x2": 1}
        pr1, ci1, fvals1 = predict_functional(result, "x1", summaries, values)

        values = {"x2": -1}
        pr2, ci2, fvals2 = predict_functional(result, "x1", summaries, values)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x2=1', lw=4, alpha=0.6, color='orange')
        plt.plot(fvals2, pr2, '-', label='x2=-1', lw=4, alpha=0.6, color='lime')
        ha, lb = ax.get_legend_handles_labels()
        leg = plt.figlegend(ha, lb, "center right")
        leg.draw_frame(False)
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

        plt.clf()
        fig = plt.figure()
        ax = plt.axes([0.1, 0.1, 0.7, 0.8])
        plt.plot(fvals1, pr1, '-', label='x2=1', lw=4, alpha=0.6, color='orange')
        plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
        plt.plot(fvals1, pr2, '-', label='x2=1', lw=4, alpha=0.6, color='lime')
        plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
        ha, lb = ax.get_legend_handles_labels()
        plt.figlegend(ha, lb, "center right")
        plt.xlabel("Focus variable", size=15)
        plt.ylabel("Fitted mean", size=15)
        plt.title("Linear model prediction")
        self.close_or_save(fig)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: cong1989/statsmodels

    def test_glm_formula(self):

        np.random.seed(542)
        n = 500
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.randint(0, 3, size=n)
        x3 = np.asarray(["ABC"[i] for i in x3])
        lin_pred = -1 + 0.5*x1**2 + (x3 == "B")
        prob = 1 / (1 + np.exp(-lin_pred))
        y = 1 * (np.random.uniform(size=n) < prob)

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})

        fml = "y ~ x1 + I(x1**2) + x2 + C(x3)"
        model = sm.GLM.from_formula(fml, family=sm.families.Binomial(), data=df)
        result = model.fit()
        summaries = {"x2": np.mean}

        for linear in False, True:

            values = {"x3": "B"}
            pr1, ci1, fvals1 = predict_functional(result, "x1", summaries, values, linear=linear)

            values = {"x3": "C"}
            pr2, ci2, fvals2 = predict_functional(result, "x1", summaries, values, linear=linear)

            exact1 = -1 + 0.5*fvals1**2 + 1
            exact2 = -1 + 0.5*fvals2**2

            if not linear:
                exact1 = 1 / (1 + np.exp(-exact1))
                exact2 = 1 / (1 + np.exp(-exact2))

            plt.clf()
            fig = plt.figure()
            ax = plt.axes([0.1, 0.1, 0.7, 0.8])
            plt.plot(fvals1, pr1, '-', label='x3=B')
            plt.plot(fvals2, pr2, '-', label='x3=C')
            plt.plot(fvals1, exact1, '-', label='x3=B (exact)')
            plt.plot(fvals2, exact2, '-', label='x3=C (exact)')
            ha, lb = ax.get_legend_handles_labels()
            plt.figlegend(ha, lb, "center right")
            plt.xlabel("Focus variable", size=15)
            if linear:
                plt.ylabel("Fitted linear predictor", size=15)
            else:
                plt.ylabel("Fitted probability", size=15)
            plt.title("Binomial GLM prediction")
            self.close_or_save(fig)

            plt.clf()
            fig = plt.figure()
            ax = plt.axes([0.1, 0.1, 0.7, 0.8])
            plt.plot(fvals1, pr1, '-', label='x3=B', color='orange')
            plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
            plt.plot(fvals2, pr2, '-', label='x3=C', color='lime')
            plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
            ha, lb = ax.get_legend_handles_labels()
            plt.figlegend(ha, lb, "center right")
            plt.xlabel("Focus variable", size=15)
            if linear:
                plt.ylabel("Fitted linear predictor", size=15)
            else:
                plt.ylabel("Fitted probability", size=15)
            plt.title("Binomial GLM prediction")
            self.close_or_save(fig)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: mortgage.py Proyecto: ShikunW/UMStats504

mort = mort.loc[mort.SALE_DATE >= 365.25 * 20]
mort = mort[[
    "log_MORTGAGE_AMOUNT", "log_SALE_AMOUNT", "SALE_DATE", "FIPS", "year"
]].dropna()

model1 = sm.OLS.from_formula(
    "log_MORTGAGE_AMOUNT ~ bs(log_SALE_AMOUNT, 8) * bs(SALE_DATE, 8)",
    data=mort)
result1 = model1.fit()

plt.clf()
ax = plt.axes([0.1, 0.12, 0.75, 0.8])
for k in range(7):
    pred, cb, fvals = predict_functional(
        result1,
        "log_SALE_AMOUNT",
        values={"SALE_DATE": 365.25 * (25 + 5 * k)})
    plt.plot(fvals, pred, '-', label="%4d" % (1960 + 25 + 5 * k))
ha, lb = ax.get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
plt.xlabel("log2 Sale Amount", size=15)
plt.ylabel("log2 Mortgage Amount", size=15)
plt.grid(True)
pdf.savefig()

plt.clf()
ax = plt.axes([0.12, 0.12, 0.75, 0.8])
for k in [16, 17, 18, 19]:
    pred, cb, fvals = predict_functional(result1,
                                         "SALE_DATE",

Ejemplo n.º 11

0

Mostrar archivo

Archivo: housing_price_index.py Proyecto: lefft/boosh

plt.grid(True)
plt.plot(result1.fittedvalues, result1.resid, 'o', alpha=0.5)
plt.xlabel("Fitted values", size=15)
plt.ylabel("Residuals", size=15)


# __Plots of the fitted conditional mean function__
# 
# 

# It's often useful to plot the fitted relationship between variables in a regression model.  To do this in a multiple regression, we usually hold all but one of the covariates (independent variables) fixed at a particular value, often at the mean value.  Here is how to make such a plot with the multiple regression model fit above.  We hold the population growth rate and real cost of borrowing fixed at their mean values and display the fitted log housing price index in terms of log household income.

# In[24]:

pr, cb, xv = predict_functional(result1, 'lpci_real', summaries={'pgr' : np.mean, 'rcb' : np.mean})

plt.clf()
plt.grid(True)
plt.plot(xv, pr, '-', color='orange', lw=4)
plt.fill_between(xv, cb[:, 0], cb[:, 1], color='grey')
plt.xlabel("Log disposable income", size=15)
plt.ylabel("Log house price index", size=15)


# To see how two predictors work together in the regression model, we can plot one of them along the horizontal axis, and plot different lines for different fixed values of the other covariate.  We do this below with the population growth rate and disposable income variables.

# In[25]:

yp = []
for k in range(3):

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: cong1989/statsmodels

    def test_scb(self):

        np.random.seed(473)
        n = 100
        x = np.random.normal(size=(n,4))
        x[:, 0] = 1

        for fam_name in "poisson", "binomial", "gaussian":

            if fam_name == "poisson":
                y = np.random.poisson(20, size=n)
                fam = sm.families.Poisson()
                true_mean = 20
                true_lp = np.log(20)
            elif fam_name == "binomial":
                y = 1 * (np.random.uniform(size=n) < 0.5)
                fam = sm.families.Binomial()
                true_mean = 0.5
                true_lp = 0
            elif fam_name == "gaussian":
                y = np.random.normal(size=n)
                fam = sm.families.Gaussian()
                true_mean = 0
                true_lp = 0

            model = sm.GLM(y, x, family=fam)
            result = model.fit()

            # CB is for linear predictor or mean response
            for linear in False, True:

                true = true_lp if linear else true_mean

                values = {'const': 1, "x2": 0}
                summaries = {"x3": np.mean}
                pred1, cb1, fvals1 = predict_functional(result, "x1",
                            values=values, summaries=summaries, linear=linear)
                pred2, cb2, fvals2 = predict_functional(result, "x1",
                            values=values, summaries=summaries,
                            ci_method='simultaneous', linear=linear)

                plt.clf()
                fig = plt.figure()
                ax = plt.axes([0.1, 0.1, 0.58, 0.8])
                plt.plot(fvals1, pred1, '-', color='black', label='Estimate')
                plt.plot(fvals1, true * np.ones(len(pred1)), '-', color='purple',
                         label='Truth')
                plt.plot(fvals1, cb1[:, 0], color='blue', label='Pointwise CB')
                plt.plot(fvals1, cb1[:, 1], color='blue')
                plt.plot(fvals2, cb2[:, 0], color='green', label='Simultaneous CB')
                plt.plot(fvals2, cb2[:, 1], color='green')
                ha, lb = ax.get_legend_handles_labels()
                leg = plt.figlegend(ha, lb, "center right")
                leg.draw_frame(False)
                plt.xlabel("Focus variable", size=15)
                if linear:
                    plt.ylabel("Linear predictor", size=15)
                else:
                    plt.ylabel("Fitted mean", size=15)
                plt.title("%s family prediction" % fam_name.capitalize())

                self.close_or_save(fig)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: year_built_tax.py Proyecto: zubada/UMStats504

                             groups="fips")
result7 = model7.fit(maxiter=5)

# Plot the estimated autocorrelations
for result in result3, result6:
    plt.clf()
    plt.plot(result.cov_struct.dep_params)
    plt.grid(True)
    plt.gca().set_xticks(range(18))
    plt.xlim(0, 15)
    plt.xlabel("Lag (years)", size=15)
    plt.ylabel("Autocorrelation", size=15)
    pdf.savefig()

from statsmodels.sandbox import predict_functional
pred1, cb1, fvals1 = predict_functional.predict_functional(
    result5, "year", ci_method="simultaneous", values={"logpop": 10})
pred2, cb2, fvals2 = predict_functional.predict_functional(
    result5, "year", ci_method="simultaneous", values={"logpop": 11})
pred3, cb3, fvals3 = predict_functional.predict_functional(
    result5, "year", ci_method="simultaneous", values={"logpop": 12})

for k in range(2):
    plt.clf()
    plt.axes([0.15, 0.1, 0.72, 0.86])
    if k == 0:
        # Plot fitted values on the log scale
        plt.plot(fvals1, pred1, '-', label="10")
        plt.plot(fvals2, pred2, '-', label="11")
        plt.plot(fvals3, pred3, '-', label="12")
        plt.fill_between(fvals1, cb1[:, 0], cb1[:, 1], color='grey')
        plt.fill_between(fvals2, cb2[:, 0], cb2[:, 1], color='grey')

Ejemplo n.º 14

0

Mostrar archivo

# This type of plot requires us to fix the values of all variables other
# than the independent variable (SBP here), and one independent variable
# that we call the *focus variable* (which is age here).  Below we fix
# the gender as "female" and the BMI as 25.  Thus, the graphs below show
# the relationship between expected SBP and age for women with BMI equal
# to 25.

from statsmodels.sandbox.predict_functional import predict_functional

# Fix certain variables at reference values.  Not all of these
# variables are used here, but we provide them with a value anyway
# to prevent a warning message from appearing.
values = {"RIAGENDRx": "Female", "RIAGENDR": 2, "BMXBMI": 25,
          "DMDEDUC2": 1, "RIDRETH1": 1, "SMQ020": 1}

pr, cb, fv = predict_functional(result, "RIDAGEYR",
                values=values, ci_method="simultaneous")

ax = sns.lineplot(fv, pr, lw=4)
ax.fill_between(fv, cb[:, 0], cb[:, 1], color='grey', alpha=0.4)
ax.set_xlabel("Age")
_ = ax.set_ylabel("SBP")

# The analogous plot for BMI is shown next.  Here we fix the gender as
# "female" and the age at 50, so we are looking at the relationship
# between expected SBP and age for women of age 50.

del values["BMXBMI"]
values["RIDAGEYR"] = 50
pr, cb, fv = predict_functional(result, "BMXBMI",
                values=values, ci_method="simultaneous")

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: kasunsp/pinalpha_mvp

    def test_glm_formula(self):

        np.random.seed(542)
        n = 500
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        x3 = np.random.randint(0, 3, size=n)
        x3 = np.asarray(["ABC"[i] for i in x3])
        lin_pred = -1 + 0.5 * x1**2 + (x3 == "B")
        prob = 1 / (1 + np.exp(-lin_pred))
        y = 1 * (np.random.uniform(size=n) < prob)

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})

        fml = "y ~ x1 + I(x1**2) + x2 + C(x3)"
        model = sm.GLM.from_formula(fml,
                                    family=sm.families.Binomial(),
                                    data=df)
        result = model.fit()
        summaries = {"x2": np.mean}

        for linear in False, True:

            values = {"x3": "B"}
            pr1, ci1, fvals1 = predict_functional(result,
                                                  "x1",
                                                  summaries,
                                                  values,
                                                  linear=linear)

            values = {"x3": "C"}
            pr2, ci2, fvals2 = predict_functional(result,
                                                  "x1",
                                                  summaries,
                                                  values,
                                                  linear=linear)

            exact1 = -1 + 0.5 * fvals1**2 + 1
            exact2 = -1 + 0.5 * fvals2**2

            if not linear:
                exact1 = 1 / (1 + np.exp(-exact1))
                exact2 = 1 / (1 + np.exp(-exact2))

            plt.clf()
            fig = plt.figure()
            ax = plt.axes([0.1, 0.1, 0.7, 0.8])
            plt.plot(fvals1, pr1, '-', label='x3=B')
            plt.plot(fvals2, pr2, '-', label='x3=C')
            plt.plot(fvals1, exact1, '-', label='x3=B (exact)')
            plt.plot(fvals2, exact2, '-', label='x3=C (exact)')
            ha, lb = ax.get_legend_handles_labels()
            plt.figlegend(ha, lb, "center right")
            plt.xlabel("Focus variable", size=15)
            if linear:
                plt.ylabel("Fitted linear predictor", size=15)
            else:
                plt.ylabel("Fitted probability", size=15)
            plt.title("Binomial GLM prediction")
            self.close_or_save(fig)

            plt.clf()
            fig = plt.figure()
            ax = plt.axes([0.1, 0.1, 0.7, 0.8])
            plt.plot(fvals1, pr1, '-', label='x3=B', color='orange')
            plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
            plt.plot(fvals2, pr2, '-', label='x3=C', color='lime')
            plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
            ha, lb = ax.get_legend_handles_labels()
            plt.figlegend(ha, lb, "center right")
            plt.xlabel("Focus variable", size=15)
            if linear:
                plt.ylabel("Fitted linear predictor", size=15)
            else:
                plt.ylabel("Fitted probability", size=15)
            plt.title("Binomial GLM prediction")
            self.close_or_save(fig)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: salesprice_lm.py Proyecto: ShikunW/UMStats504

df = df.loc[df.SALE_DATE >= 365.25 * 20]
df["age"] = df.year - df.YEAR_BUILT

fml = "log_SALE_AMOUNT ~ bs(year, 6) * bs(age, 6) + bs(year, 6) * (bs(LAND_SQUARE_FOOTAGE, 6) + bs(LIVING_SQUARE_FEET, 6) + bs(age, 6))"
model = sm.OLS.from_formula(fml, df)
result = model.fit()

pdf = PdfPages("salesprice_lm.pdf")

plt.clf()
for age in 0, 10, 20, 40:
    pred, cb, fvals = predict_functional(result,
                                         "year",
                                         values={"age": age},
                                         summaries={
                                             "LAND_SQUARE_FOOTAGE": np.median,
                                             "LIVING_SQUARE_FEET": np.median
                                         })
    plt.plot(fvals, pred, '-', label=str(age))

plt.grid(True)
ha, lb = plt.gca().get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
plt.ylabel("Sales price (log2)", size=15)
plt.xlabel("Year of sale", size=15)

pdf.savefig()

pdf.close()

Ejemplo n.º 17

0

Mostrar archivo

Archivo: nhanes_glm.py Proyecto: willtarn/workshops

# +
from statsmodels.sandbox.predict_functional import predict_functional

values = {
    "RIAGENDRx": "Female",
    "RIAGENDR": 1,
    "BMXBMI": 25,
    "DMDEDUC2": 1,
    "RIDRETH1": 1,
    "SMQ020": 1,
    "DMDEDUC2x": "College",
    "BPXSY1": 120
}

pr, cb, fv = predict_functional(result,
                                "RIDAGEYR",
                                values=values,
                                ci_method="simultaneous")

ax = sns.lineplot(fv, pr, lw=4)
ax.fill_between(fv, cb[:, 0], cb[:, 1], color='grey', alpha=0.4)
ax.set_xlabel("Age")
_ = ax.set_ylabel("Smoking")
# -

# We can display the same plot in terms of probabilities instead of in
# terms of log odds.  The probability can be obtained from the log odds
# using the relationship `p = 1 / (1 + exp(-o))` where `o` is the log
# odds.  Note that while the age and log odds are linearly related, age
# has a curved relationship with probability.  This is necessary since
# probabilities must remain between 0 and 1, a linear relationship would
# eventually exit the domain.

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_predict_functional.py Proyecto: kasunsp/pinalpha_mvp

    def test_scb(self):

        np.random.seed(473)
        n = 100
        x = np.random.normal(size=(n, 4))
        x[:, 0] = 1

        for fam_name in "poisson", "binomial", "gaussian":

            if fam_name == "poisson":
                y = np.random.poisson(20, size=n)
                fam = sm.families.Poisson()
                true_mean = 20
                true_lp = np.log(20)
            elif fam_name == "binomial":
                y = 1 * (np.random.uniform(size=n) < 0.5)
                fam = sm.families.Binomial()
                true_mean = 0.5
                true_lp = 0
            elif fam_name == "gaussian":
                y = np.random.normal(size=n)
                fam = sm.families.Gaussian()
                true_mean = 0
                true_lp = 0

            model = sm.GLM(y, x, family=fam)
            result = model.fit()

            # CB is for linear predictor or mean response
            for linear in False, True:

                true = true_lp if linear else true_mean

                values = {'const': 1, "x2": 0}
                summaries = {"x3": np.mean}
                pred1, cb1, fvals1 = predict_functional(result,
                                                        "x1",
                                                        values=values,
                                                        summaries=summaries,
                                                        linear=linear)
                pred2, cb2, fvals2 = predict_functional(
                    result,
                    "x1",
                    values=values,
                    summaries=summaries,
                    ci_method='simultaneous',
                    linear=linear)

                plt.clf()
                fig = plt.figure()
                ax = plt.axes([0.1, 0.1, 0.58, 0.8])
                plt.plot(fvals1, pred1, '-', color='black', label='Estimate')
                plt.plot(fvals1,
                         true * np.ones(len(pred1)),
                         '-',
                         color='purple',
                         label='Truth')
                plt.plot(fvals1, cb1[:, 0], color='blue', label='Pointwise CB')
                plt.plot(fvals1, cb1[:, 1], color='blue')
                plt.plot(fvals2,
                         cb2[:, 0],
                         color='green',
                         label='Simultaneous CB')
                plt.plot(fvals2, cb2[:, 1], color='green')
                ha, lb = ax.get_legend_handles_labels()
                leg = plt.figlegend(ha, lb, "center right")
                leg.draw_frame(False)
                plt.xlabel("Focus variable", size=15)
                if linear:
                    plt.ylabel("Linear predictor", size=15)
                else:
                    plt.ylabel("Fitted mean", size=15)
                plt.title("%s family prediction" % fam_name.capitalize())

                self.close_or_save(fig)

Ejemplo n.º 19

0

Mostrar archivo

#https://towardsdatascience.com/logistic-regression-model-fitting-and-finding-the-correlation-p-value-z-score-confidence-8330fb86db19
#With help from this site

# In[92]:

from statsmodels.sandbox.predict_functional import predict_functional

# In[104]:

values = {"hist": 0, "tumorsize": 50, "accinsitu": 0, "lymphinv": 0}

# In[105]:

pr, cb, fv = predict_functional(result,
                                "age",
                                values=values,
                                ci_method="simultaneous")

# In[106]:

ax = sns.lineplot(fv, pr, lw=4)
ax.fill_between(fv, cb[:, 0], cb[:, 1], color='grey', alpha=0.4)
ax.set_xlabel("age")
ax.set_ylabel("Re-excision")

ax.set_title('Fitted Model: Log-odd probability of Age by Re-excision')

#This plot of fitted log-odds  visualizes the effect of age on reexcision for
#hist=0, tumorsize=23, accinsitu=0 and lumphinv=0 by the glm fitted model
#Slight negative correlation of age and RE are visible in this plot
#For the specific described variables