def test_poisson_formula(): y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 0.5) for vb in False, True: glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident) if vb: rslt1 = glmm1.fit_vb() else: rslt1 = glmm1.fit_map() # Build categorical variables that match exog_vc df = pd.DataFrame({"y": y, "x1": exog_fe[:, 0]}) z1 = np.zeros(len(y)) for j, k in enumerate(np.flatnonzero(ident == 0)): z1[exog_vc[:, k] == 1] = j df["z1"] = z1 z2 = np.zeros(len(y)) for j, k in enumerate(np.flatnonzero(ident == 1)): z2[exog_vc[:, k] == 1] = j df["z2"] = z2 fml = "y ~ 0 + x1" from collections import OrderedDict vc_fml = OrderedDict({}) vc_fml["z1"] = "0 + C(z1)" vc_fml["z2"] = "0 + C(z2)" glmm2 = PoissonBayesMixedGLM.from_formula(fml, vc_fml, df) if vb: rslt2 = glmm2.fit_vb() else: rslt2 = glmm2.fit_map() assert_allclose(rslt1.params, rslt2.params, rtol=1e-5)
def test_doc_examples(): np.random.seed(8767) n = 200 m = 20 data = pd.DataFrame({"Year": np.random.uniform(0, 1, n), "Village": np.random.randint(0, m, n)}) data['year_cen'] = data['Year'] - data.Year.mean() # Binomial outcome lpr = np.random.normal(size=m)[data.Village] lpr += np.random.normal(size=m)[data.Village] * data.year_cen y = (np.random.uniform(size=n) < 1 / (1 + np.exp(-lpr))) data["y"] = y.astype(np.int) # These lines should agree with the example in the class docstring. random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'} model = BinomialBayesMixedGLM.from_formula( 'y ~ year_cen', random, data) result = model.fit_vb() _ = result # Poisson outcome lpr = np.random.normal(size=m)[data.Village] lpr += np.random.normal(size=m)[data.Village] * data.year_cen data["y"] = np.random.poisson(np.exp(lpr)) # These lines should agree with the example in the class docstring. random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'} model = PoissonBayesMixedGLM.from_formula( 'y ~ year_cen', random, data) result = model.fit_vb() _ = result
def test_poisson_formula(): y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 0.5) for vb in False, True: glmm1 = PoissonBayesMixedGLM( y, exog_fe, exog_vc, ident) if vb: rslt1 = glmm1.fit_vb() else: rslt1 = glmm1.fit_map() # Build categorical variables that match exog_vc df = pd.DataFrame({"y": y, "x1": exog_fe[:, 0]}) z1 = np.zeros(len(y)) for j,k in enumerate(np.flatnonzero(ident == 0)): z1[exog_vc[:, k] == 1] = j df["z1"] = z1 z2 = np.zeros(len(y)) for j,k in enumerate(np.flatnonzero(ident == 1)): z2[exog_vc[:, k] == 1] = j df["z2"] = z2 fml = "y ~ 0 + x1" from collections import OrderedDict vc_fml = OrderedDict({}) vc_fml["z1"] = "0 + C(z1)" vc_fml["z2"] = "0 + C(z2)" glmm2 = PoissonBayesMixedGLM.from_formula(fml, vc_fml, df) if vb: rslt2 = glmm2.fit_vb() else: rslt2 = glmm2.fit_map() assert_allclose(rslt1.params, rslt2.params, rtol=1e-5) for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if vb: assert_equal(cp.shape, np.r_[p,]) assert_equal(cp > 0, True*np.ones(p)) else: assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp)
def test_poisson_formula(): y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 0.5) for vb in False, True: glmm1 = PoissonBayesMixedGLM( y, exog_fe, exog_vc, ident) if vb: rslt1 = glmm1.fit_vb() else: rslt1 = glmm1.fit_map() # Build categorical variables that match exog_vc df = pd.DataFrame({"y": y, "x1": exog_fe[:, 0]}) z1 = np.zeros(len(y)) for j,k in enumerate(np.flatnonzero(ident == 0)): z1[exog_vc[:, k] == 1] = j df["z1"] = z1 z2 = np.zeros(len(y)) for j,k in enumerate(np.flatnonzero(ident == 1)): z2[exog_vc[:, k] == 1] = j df["z2"] = z2 fml = "y ~ 0 + x1" vc_fml = {} vc_fml["z1"] = "0 + C(z1)" vc_fml["z2"] = "0 + C(z2)" glmm2 = PoissonBayesMixedGLM.from_formula(fml, vc_fml, df) if vb: rslt2 = glmm2.fit_vb() else: rslt2 = glmm2.fit_map() assert_allclose(rslt1.params, rslt2.params, rtol=1e-5) for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if vb: assert_equal(cp.shape, np.r_[p,]) assert_equal(cp > 0, True*np.ones(p)) else: assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp)
stats.probplot(gee_model2_results.resid_pearson, plot=plt, fit=True) axs[1,0].set_xlabel("Theoretical quantiles",fontsize=10) axs[1,0].set_ylabel("Sample quantiles",fontsize=10) axs[1,0].set_title("Q-Q plot of normalized residuals",fontsize=10) plt.subplots_adjust(left=0.12, hspace=0.25) plt.show() # model comparison print(gee_model2_results.qic()) print(gee_model1_results.qic()) # Both models improve, but are not satisfatory, probably because they cannot take account of excessive zeros and they only use cluster-robust standard errors and thus # cannot model how lower level coefficients vary across groups of the higher level. Python statsmodels has zero-inflated count model methods, but they cannot deal with # panel/clustered data. # Approach two to generalized linear models for panel data: Generalized Linear Mixed Effects Model support Poisson models using Bayesian methods # poisson mixed effects model with one random effect formula = "cases_count_pos ~ week_of_year + percent_age65over + percent_female + percent_black" po_bay_panel1 = PoissonBayesMixedGLM.from_formula(formula, {'state': '0 + C(state)'}, US_cases_long_demogr_week) po_bay_panel1_results = po_bay_panel1.fit_map() print(po_bay_panel1_results.summary()) # poisson mixed effects model with two independnet random effects formula = "cases_count_pos ~ week_of_year + percent_age65over + percent_female + percent_black" po_bay_panel2 = PoissonBayesMixedGLM.from_formula(formula, {'state': '0 + C(state)', "week_of_year": '0 + C(week_of_year)'}, US_cases_long_demogr_week) po_bay_panel2_results = po_bay_panel2.fit_map() print(po_bay_panel2_results.summary())
nfl_rush_2019 = nfl_2019[nfl_2019["play_type"] == "rush"] # first, fit rush outcome models # 1 - touchdown rush_penalty_mod = BinomialBayesMixedGLM.from_formula( 'penalty ~ shotgun + no_huddle + qb_dropback + run_location + run_gap', ['0 + rusher_id', '0 + def_id'], data=nfl_rush_2019) rush_penalty_result = rush_penalty_mod.fit_vb # 2 - rushing yards rush_yard_mod = PoissonBayesMixedGLM.from_formula( 'yards_gained ~ shotgun + no_huddle + qb_dropback + run_location + run_gap', ['0 + rusher_id', '0 + def_id'], data=nfl_rush_2019) rush_yard_result = rush_yard_mod.fit_vb() # 3 - rushing turnovers (fumbles) rush_turnover_mod = BinomialBayesMixedGLM.from_formula( 'turnover ~ shotgun + no_huddle + qb_dropback + run_location + run_gap', ['0 + rusher_id', '0 + def_id'], data=nfl_rush_2019) rush_turnover_result = rush_turnover_mod.fit_vb() # 4 - penalty (holding) rush_penalty_mod = BinomialBayesMixedGLM.from_formula( 'penalty ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',