Ejemplo n.º 1
0
def test_poisson_formula():

    y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 0.5)

    for vb in False, True:

        glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident)
        if vb:
            rslt1 = glmm1.fit_vb()
        else:
            rslt1 = glmm1.fit_map()

        # Build categorical variables that match exog_vc
        df = pd.DataFrame({"y": y, "x1": exog_fe[:, 0]})
        z1 = np.zeros(len(y))
        for j, k in enumerate(np.flatnonzero(ident == 0)):
            z1[exog_vc[:, k] == 1] = j
        df["z1"] = z1
        z2 = np.zeros(len(y))
        for j, k in enumerate(np.flatnonzero(ident == 1)):
            z2[exog_vc[:, k] == 1] = j
        df["z2"] = z2

        fml = "y ~ 0 + x1"
        from collections import OrderedDict
        vc_fml = OrderedDict({})
        vc_fml["z1"] = "0 + C(z1)"
        vc_fml["z2"] = "0 + C(z2)"
        glmm2 = PoissonBayesMixedGLM.from_formula(fml, vc_fml, df)
        if vb:
            rslt2 = glmm2.fit_vb()
        else:
            rslt2 = glmm2.fit_map()

        assert_allclose(rslt1.params, rslt2.params, rtol=1e-5)
def test_doc_examples():

    np.random.seed(8767)
    n = 200
    m = 20
    data = pd.DataFrame({"Year": np.random.uniform(0, 1, n),
                         "Village": np.random.randint(0, m, n)})
    data['year_cen'] = data['Year'] - data.Year.mean()

    # Binomial outcome
    lpr = np.random.normal(size=m)[data.Village]
    lpr += np.random.normal(size=m)[data.Village] * data.year_cen
    y = (np.random.uniform(size=n) < 1 / (1 + np.exp(-lpr)))
    data["y"] = y.astype(np.int)

    # These lines should agree with the example in the class docstring.
    random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
    model = BinomialBayesMixedGLM.from_formula(
                 'y ~ year_cen', random, data)
    result = model.fit_vb()
    _ = result

    # Poisson outcome
    lpr = np.random.normal(size=m)[data.Village]
    lpr += np.random.normal(size=m)[data.Village] * data.year_cen
    data["y"] = np.random.poisson(np.exp(lpr))

    # These lines should agree with the example in the class docstring.
    random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
    model = PoissonBayesMixedGLM.from_formula(
                 'y ~ year_cen', random, data)
    result = model.fit_vb()
    _ = result
Ejemplo n.º 3
0
def test_poisson_formula():

    y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 0.5)

    for vb in False, True:

        glmm1 = PoissonBayesMixedGLM(
            y, exog_fe, exog_vc, ident)
        if vb:
            rslt1 = glmm1.fit_vb()
        else:
            rslt1 = glmm1.fit_map()

        # Build categorical variables that match exog_vc
        df = pd.DataFrame({"y": y, "x1": exog_fe[:, 0]})
        z1 = np.zeros(len(y))
        for j,k in enumerate(np.flatnonzero(ident == 0)):
            z1[exog_vc[:, k] == 1] = j
        df["z1"] = z1
        z2 = np.zeros(len(y))
        for j,k in enumerate(np.flatnonzero(ident == 1)):
            z2[exog_vc[:, k] == 1] = j
        df["z2"] = z2

        fml = "y ~ 0 + x1"
        from collections import OrderedDict
        vc_fml = OrderedDict({})
        vc_fml["z1"] = "0 + C(z1)"
        vc_fml["z2"] = "0 + C(z2)"
        glmm2 = PoissonBayesMixedGLM.from_formula(fml, vc_fml, df)
        if vb:
            rslt2 = glmm2.fit_vb()
        else:
            rslt2 = glmm2.fit_map()

        assert_allclose(rslt1.params, rslt2.params, rtol=1e-5)

        for rslt in rslt1, rslt2:
            cp = rslt.cov_params()
            p = len(rslt.params)
            if vb:
                assert_equal(cp.shape, np.r_[p,])
                assert_equal(cp > 0, True*np.ones(p))
            else:
                assert_equal(cp.shape, np.r_[p, p])
                np.linalg.cholesky(cp)
def test_poisson_formula():

    y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 0.5)

    for vb in False, True:

        glmm1 = PoissonBayesMixedGLM(
            y, exog_fe, exog_vc, ident)
        if vb:
            rslt1 = glmm1.fit_vb()
        else:
            rslt1 = glmm1.fit_map()

        # Build categorical variables that match exog_vc
        df = pd.DataFrame({"y": y, "x1": exog_fe[:, 0]})
        z1 = np.zeros(len(y))
        for j,k in enumerate(np.flatnonzero(ident == 0)):
            z1[exog_vc[:, k] == 1] = j
        df["z1"] = z1
        z2 = np.zeros(len(y))
        for j,k in enumerate(np.flatnonzero(ident == 1)):
            z2[exog_vc[:, k] == 1] = j
        df["z2"] = z2

        fml = "y ~ 0 + x1"
        vc_fml = {}
        vc_fml["z1"] = "0 + C(z1)"
        vc_fml["z2"] = "0 + C(z2)"
        glmm2 = PoissonBayesMixedGLM.from_formula(fml, vc_fml, df)
        if vb:
            rslt2 = glmm2.fit_vb()
        else:
            rslt2 = glmm2.fit_map()

        assert_allclose(rslt1.params, rslt2.params, rtol=1e-5)

        for rslt in rslt1, rslt2:
            cp = rslt.cov_params()
            p = len(rslt.params)
            if vb:
                assert_equal(cp.shape, np.r_[p,])
                assert_equal(cp > 0, True*np.ones(p))
            else:
                assert_equal(cp.shape, np.r_[p, p])
                np.linalg.cholesky(cp)
Ejemplo n.º 5
0
stats.probplot(gee_model2_results.resid_pearson, plot=plt, fit=True)
axs[1,0].set_xlabel("Theoretical quantiles",fontsize=10)
axs[1,0].set_ylabel("Sample quantiles",fontsize=10)
axs[1,0].set_title("Q-Q plot of normalized residuals",fontsize=10)

plt.subplots_adjust(left=0.12, hspace=0.25)
plt.show()

# model comparison
print(gee_model2_results.qic())    
print(gee_model1_results.qic())

# Both models improve, but are not satisfatory, probably because they cannot take account of excessive zeros and they only use cluster-robust standard errors and thus
# cannot model how lower level coefficients vary across groups of the higher level. Python statsmodels has zero-inflated count model methods, but they cannot deal with 
# panel/clustered data.

# Approach two to generalized linear models for panel data: Generalized Linear Mixed Effects Model support Poisson models using Bayesian methods
# poisson mixed effects model with one random effect
formula = "cases_count_pos ~ week_of_year + percent_age65over + percent_female + percent_black"                                                                                                                                                                                                                         
po_bay_panel1 = PoissonBayesMixedGLM.from_formula(formula, {'state': '0 + C(state)'}, US_cases_long_demogr_week)                                                              
po_bay_panel1_results = po_bay_panel1.fit_map()                                                                                                                        
print(po_bay_panel1_results.summary()) 

# poisson mixed effects model with two independnet random effects
formula = "cases_count_pos ~ week_of_year + percent_age65over + percent_female + percent_black"                                                                                                                                                                                                                         
po_bay_panel2 = PoissonBayesMixedGLM.from_formula(formula, {'state': '0 + C(state)', "week_of_year": '0 + C(week_of_year)'}, US_cases_long_demogr_week)                                                              
po_bay_panel2_results = po_bay_panel2.fit_map()                                                                                                                        
print(po_bay_panel2_results.summary()) 

Ejemplo n.º 6
0
nfl_rush_2019 = nfl_2019[nfl_2019["play_type"] == "rush"]

# first, fit rush outcome models

# 1 - touchdown
rush_penalty_mod = BinomialBayesMixedGLM.from_formula(
    'penalty ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
    ['0 + rusher_id', '0 + def_id'],
    data=nfl_rush_2019)

rush_penalty_result = rush_penalty_mod.fit_vb

# 2 - rushing yards
rush_yard_mod = PoissonBayesMixedGLM.from_formula(
    'yards_gained ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
    ['0 + rusher_id', '0 + def_id'],
    data=nfl_rush_2019)

rush_yard_result = rush_yard_mod.fit_vb()

# 3 - rushing turnovers (fumbles)
rush_turnover_mod = BinomialBayesMixedGLM.from_formula(
    'turnover ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
    ['0 + rusher_id', '0 + def_id'],
    data=nfl_rush_2019)

rush_turnover_result = rush_turnover_mod.fit_vb()

# 4 - penalty (holding)
rush_penalty_mod = BinomialBayesMixedGLM.from_formula(
    'penalty ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',