def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) psi = pm.HalfNormal("psi", sd=1) pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data["theta"].shape == (5000,) assert gen_data["psi"].shape == (5000,) assert gen_data["suppliers"].shape == (5000, 20)
def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta('theta', alpha=1, beta=1) psi = pm.HalfNormal('psi', sd=1) pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data['theta'].shape == (5000, ) assert gen_data['psi'].shape == (5000, ) assert gen_data['suppliers'].shape == (5000, 20)
def build_model(self): with pm.Model() as model: # Estimated occupancy psi = pm.Beta('psi', 1, 1) # Latent variable for occupancy pm.Bernoulli('z', psi, self.y.shape) # Estimated mean count theta = pm.Uniform('theta', 0, 100) # Poisson likelihood pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y) return model
def build_model(self): with pm.Model() as model: # Estimated occupancy psi = pm.Beta("psi", 1, 1) # Latent variable for occupancy pm.Bernoulli("z", psi, shape=self.y.shape) # Estimated mean count theta = pm.Uniform("theta", 0, 100) # Poisson likelihood pm.ZeroInflatedPoisson("y", psi, theta, observed=self.y) return model
def get_model(dist, data) -> pm.Model: means = data.mean(0) n_exp = data.shape[1] if dist == "Poisson": with pm.Model() as poi_model: lam = pm.Exponential("lam", lam=means, shape=(1, n_exp)) poi = pm.Poisson( "poi", mu=lam, observed=data, ) return poi_model if dist == "ZeroInflatedPoisson": with pm.Model() as zip_model: psi = pm.Uniform("psi", shape=(1, n_exp)) lam = pm.Exponential("lam", lam=means, shape=(1, n_exp)) zip = pm.ZeroInflatedPoisson( "zip", psi=psi, theta=lam, observed=data, ) return zip_model if dist == "NegativeBinomial": with pm.Model() as nb_model: gamma = pm.Gamma("gm", 0.01, 0.01, shape=(1, n_exp)) lam = pm.Exponential("lam", lam=means, shape=(1, n_exp)) nb = pm.NegativeBinomial( "nb", alpha=gamma, mu=lam, observed=data, ) return nb_model if dist == "ZeroInflatedNegativeBinomial": with pm.Model() as zinb_model: gamma = pm.Gamma("gm", 0.01, 0.01, shape=(1, n_exp)) lam = pm.Exponential("lam", lam=means, shape=(1, n_exp)) psi = pm.Uniform("psi", shape=(1, n_exp)) zinb = pm.ZeroInflatedNegativeBinomial( "zinb", psi=psi, alpha=gamma, mu=lam, observed=data, ) return zinb_model
def run_model(month=7, n_samples=1000, interp_type='ncs', binary=True, spike=0.9, hdi_prob=0.95, zero_inf=0.7): # preprocessing binary_str = 'binary' if binary else 'nonbinary' df = pd.read_csv('../data/' + interp_type + '-pop-deaths-and-' + binary_str + '-mandates.csv', index_col=0) df = df.rename(columns={ "Age Group": "Age_Group", "COVID-19 Deaths": "covid_19_deaths" }) test_df = df[df["Month"] == month] sex = np.array(test_df["Sex"]) mandates = test_df.iloc[:, -4:] # takes all of the 4 mandate columns that currently exist age = test_df["Age_Group"] covid_deaths = test_df["covid_19_deaths"] population = test_df[ "Population"] / 1000000 # makes the population in units of millions n = len(test_df["Age_Group"].unique() ) # should decrease by 1 after proper age filtering age_data = pd.get_dummies(test_df["Age_Group"]).drop("Under 1 year", axis=1) sex_data = pd.get_dummies(test_df["Sex"], drop_first=True) # run the model with pm.Model() as model: # spike and slab prior tau = pm.InverseGamma('tau', alpha=20, beta=20) xi = pm.Bernoulli('xi', p=spike, shape=len(mandates.columns)) beta_mandates = pm.MvNormal('beta_mandate', mu=0, cov=tau * np.eye(len(mandates.columns)), shape=len(mandates.columns)) # age prior mu_age_mean = np.linspace(-5, 5, len(age_data.columns)) cov = pm.HalfNormal('cov', sigma=2) mu_age = pm.MvNormal('mu_age', mu=mu_age_mean, cov=np.identity(len(age_data.columns)), shape=(1, 10)) beta_age = pm.MvNormal('beta_age', mu=mu_age, cov=(cov**2) * np.identity(10), shape=(1, 10)) # sex prior mu_sex = pm.Normal('mu_sex', mu=0, sigma=1) sigma_sex = pm.HalfNormal('simga_sex', sigma=2) beta_sex = pm.Normal('beta_sex', mu=mu_sex, sigma=sigma_sex) # intercept prior mu_intercept = pm.Normal('mu_intercept', mu=0, sigma=1) sigma_intercept = pm.HalfNormal('simga_intercept', sigma=2) beta_intercept = pm.Normal('beta_intercept', mu=mu_intercept, sigma=sigma_intercept) # mean setup for likelihood mandates = np.array(mandates).astype(theano.config.floatX) population = np.array(population).astype(theano.config.floatX) sex = np.array(sex_data).astype(theano.config.floatX) age = np.array(age_data).astype(theano.config.floatX) w_mandates = theano.shared(mandates, 'w_mandate') w_sex = theano.shared(sex, 'w_sex') w_age = theano.shared(age, 'w_age') mean = beta_intercept + pm.math.matrix_dot(w_mandates, xi*beta_mandates) \ + pm.math.matrix_dot(w_sex, beta_sex).T \ + pm.math.matrix_dot(w_age, beta_age.T).T # likelihood obs = pm.ZeroInflatedPoisson('y_obs', psi=zero_inf, theta=population * tt.exp(mean), observed=covid_deaths) # obs = pm.Normal('crap', mu=mean, sigma=3, observed=covid_deaths) # sample from posterior trace = pm.sample(n_samples, tune=n_samples, nuts={'target_accept': 0.98}) # posterior hdis mandates = test_df.iloc[:, -4:] x = az.summary(trace, var_names=["beta_mandate"], hdi_prob=hdi_prob) x.index = mandates.columns x.to_csv('../images/posteriors/mandate_' + interp_type + '_' + binary_str + '_' + 'summary.csv') x = az.summary(trace, var_names=["beta_sex"], hdi_prob=hdi_prob) x.index = sex_data.columns x.to_csv('../images/posteriors/sex_' + interp_type + '_' + binary_str + '_' + 'summary.csv') x = az.summary(trace, var_names=["beta_age"], hdi_prob=hdi_prob) x.index = age_data.columns x.to_csv('../images/posteriors/age_' + interp_type + '_' + binary_str + '_' + 'summary.csv') x = az.summary(trace, var_names=["beta_intercept"], hdi_prob=hdi_prob) x.to_csv('../images/posteriors/intercept_' + interp_type + '_' + binary_str + '_' + 'summary.csv') # posterior distributions ax = az.plot_forest(trace, 'ridgeplot', var_names=["beta_intercept"], combined=True, hdi_prob=0.99999) ax[0].set_title(r'Posterior Distribution of $\beta_0$') plt.savefig('../images/posteriors/intercept_posteriors_' + interp_type + '_' + binary_str + '.png') ax = az.plot_forest(trace, 'ridgeplot', var_names=["beta_age"], combined=True, hdi_prob=0.99999) ax[0].set_yticklabels(reversed(age_data.columns)) ax[0].set_title(r'Posterior Distribution of $\beta_{age}$') plt.savefig('../images/posteriors/age_posteriors_' + interp_type + '_' + binary_str + '.png') ax = az.plot_forest(trace, 'ridgeplot', var_names=["beta_sex"], combined=True, hdi_prob=0.99999) ax[0].set_yticklabels(reversed(sex_data.columns)) ax[0].set_title(r'Posterior Distribution of $\beta_{sex}$') plt.savefig('../images/posteriors/sex_posteriors_' + interp_type + '_' + binary_str + '.png') ax = az.plot_forest(trace, 'ridgeplot', var_names=["beta_mandate"], combined=True, hdi_prob=0.99999) ax[0].set_yticklabels(reversed(mandates.columns)) ax[0].set_title(r'Posterior Distribution of $\beta_{mandate}$') plt.savefig('../images/posteriors/mandate_posteriors_' + interp_type + '_' + binary_str + '.png') # ESS Plots ax = az.plot_ess(trace, var_names=["beta_intercept"]) ax.set_title(r'$\beta_0$ ESS') plt.savefig('../images/ess/' + interp_type + '_' + binary_str + '_interceptESS.png') ax = az.plot_ess(trace, var_names=["beta_age"]) ax[0, 0].set_title(r'$\beta_{age[1-4]}$ ESS', fontsize=18) ax[0, 1].set_title(r'$\beta_{age[15-24]}$ ESS', fontsize=18) ax[0, 2].set_title(r'$\beta_{age[25-34]}$ ESS', fontsize=18) ax[1, 0].set_title(r'$\beta_{age[35-44]}$ ESS', fontsize=18) ax[1, 1].set_title(r'$\beta_{age[45-54]}$ ESS', fontsize=18) ax[1, 2].set_title(r'$\beta_{age[5-14]}$ ESS', fontsize=18) ax[2, 0].set_title(r'$\beta_{age[55-64]}$ ESS', fontsize=18) ax[2, 1].set_title(r'$\beta_{age[65-74]}$ ESS', fontsize=18) ax[2, 2].set_title(r'$\beta_{age[75-84]}$ ESS', fontsize=18) ax[3, 0].set_title(r'$\beta_{age[85+]}$ ESS', fontsize=18) plt.savefig('../images/ess/' + interp_type + '_' + binary_str + '_ageESS.png') ax = az.plot_ess(trace, var_names=["beta_sex"]) ax.set_title(r'$\beta_{sex}$ ESS') plt.savefig('../images/ess/' + interp_type + '_' + binary_str + '_sexESS.png') ax = az.plot_ess(trace, var_names=["beta_mandate"]) ax[0].set_title(r'$\beta_{mandate[April]}$ ESS', fontsize=18) ax[1].set_title(r'$\beta_{mandate[May]}$ ESS', fontsize=18) ax[2].set_title(r'$\beta_{mandate[June]}$ ESS', fontsize=18) ax[3].set_title(r'$\beta_{mandate[July]}$ ESS', fontsize=18) plt.savefig('../images/ess/' + interp_type + '_' + binary_str + '_mandateESS.png') # posterior predictive checking with model: ppc = pm.sample_posterior_predictive(trace, var_names=["y_obs"]) az.plot_ppc(az.from_pymc3(posterior_predictive=ppc, model=model)) plt.savefig('../images/posterior_predictive/' + interp_type + '_' + binary_str + '.png') # return trace so that user can work with posterior data directly return trace
df = pd.read_csv(FISHFILE) # This dataset includes data collected from a survey of 250 visitors who # visited the park. The group level data consists of: # - The number of fish they caught (count) # - The number of children in the group (child) # - If they took a camper to the park (camper) with pm.Model() as ZIP_reg: psi = pm.Beta("psi", 1, 1) alpha = pm.Normal("alpha", 0, 10) beta = pm.Normal("beta", 0, 10, shape=2) lam = pm.math.exp(alpha + beta[0] * df["child"] + beta[1] * df["camper"]) y = pm.ZeroInflatedPoisson("y", theta=lam, psi=psi, observed=df["count"]) trace_ZIP_reg = pm.sample(2000) chain_ZIP_reg = trace_ZIP_reg[100:] pm.traceplot(chain_ZIP_reg) plt.savefig("fish_traceplot.png") plt.close() children = [0, 1, 2, 3, 4] fish_count_pred_0 = [] fish_count_pred_1 = [] thin = 5 for n in children: without_camper = chain_ZIP_reg.alpha[::
import pymc3 as pm import matplotlib.pyplot as plt import numpy as np np.random.seed(42) n = 100 theta = 2.5 # Poisson rate pi = 0.1 # probability of extra-zeros (pi = 1-psi) # Simulate some data counts = np.array([(np.random.random() > pi) * np.random.poisson(theta) for i in range(n)]) with pm.Model() as ZIP: psi = pm.Beta('p', 1, 1) lam = pm.Gamma('lam', 2, 0.1) y = pm.ZeroInflatedPoisson('y', lam, psi, observed=counts) trace = pm.sample(5000) pm.traceplot(trace[100:]) plt.show()
n = 100 theta_real = 2.5 psi = 0.1 # Simulate some data counts = np.array([(np.random.random() > (1 - psi)) * np.random.poisson(theta_real) for i in range(n)]) # In[33]: with pm.Model() as ZIP: psi = pm.Beta('psi', 1, 1) theta = pm.Gamma('theta', 2, 0.1) y = pm.ZeroInflatedPoisson('y', psi, theta, observed=counts) trace = pm.sample(1000) # In[34]: az.plot_trace(trace) plt.savefig('B11197_04_11.png', dpi=300) # In[35]: #az.summary(trace) # ## Poisson regression and ZIP regression # In[36]:
plt.figure() np.random.seed(42) n = 100 theta = 2.5 pi = 0.1 counts = np.array([(np.random.random() > pi) * np.random.poisson(theta) for i in range(n)]) with pm.Model() as ZIP: psi = pm.Beta('psi', 1, 1) lam = pm.Gamma('lam', 2, 0.1) y = pm.ZeroInflatedPoisson('y', psi, lam, observed=counts) trace_ZIP = pm.sample(5000, njobs=1) chain_ZIP = trace_ZIP[100:] pm.traceplot(chain_ZIP) plt.savefig('img708.png', dpi=300, figsize=[5.5, 5.5]) plt.figure() #https://stats.idre.ucla.edu/stat/data/fish.csv fish_data = pd.read_csv('fish.csv') fish_data.head() with pm.Model() as ZIP_reg: psi = pm.Beta('psi', 1, 1)
plt.bar(0.0, drink_zeros, width=1.0, bottom=work_zeros, color="C1", alpha=0.5) plt.xticks(bins + 0.5) plt.xlabel("manuscripts completed") plt.ylabel("Frequency") # %% with pm.Model() as m11_4: ap = pm.Normal("ap", 0.0, 1.0) p = pm.math.sigmoid(ap) al = pm.Normal("al", 0.0, 10.0) lambda_ = pm.math.exp(al) y_obs = pm.ZeroInflatedPoisson("y_obs", 1.0 - p, lambda_, observed=y) # %% with m11_4: map_11_4 = pm.find_MAP() # %% map_11_4 # %% sp.special.expit(map_11_4["ap"]) # %% np.exp(map_11_4["al"]) # %%
fishes = data["count"].values children = data["child"].values camper = data["camper"].values # --------------------------- specify a probabilistic model ----------------------------------- # with pm.Model() as zip_regression: # get the priors on the parameters alpha = pm.Normal("alpha", mu = 0, sd = 10) beta = pm.Normal("beta", mu = 0, sd = 10, shape = 2) # get the prior on the inflation coefficient psi = pm.Beta("psi", 1 , 1) # get the theta theta = pm.Deterministic("theta", pm.math.exp(alpha + beta[0] * children + beta[1] * camper)) # specify the likelihood of the data y_obs = pm.ZeroInflatedPoisson("y_obs", psi, theta, observed = fishes) # inference step trace = pm.sample(1500) # -------------------------- analyse the posterior --------------------------------------- # with zip_regression: log.info("The summary of the trace is as follows: %s", az.summary(trace,var_names = ["alpha","beta","psi"])) az.plot_trace(trace,var_names = ["alpha","beta","psi"]) # -------------------------- plot --------------------------------------- # plt.figure() # initialize data to plot children = [0,1,2,3,4] fish_count_pred_0 = []
plt.show() # -------------------- generate synthetic data --------------------- # # set the number of draws n = 1000 # set the true theta theta_real = 2.5 # set the zero-inflating factor psi_true = 0.5 # generate data from the ZIP model counts = np.array([(np.random.random() > 1 - psi_true) * np.random.poisson(theta_real) for i in range(n)]) # --------------------- probabilistic method ---------------------- # with pm.Model() as zip_model: # specify the priors of the zero-inflated Poisson model psi = pm.Beta("psi", 1,1) theta = pm.Gamma("theta",2,0.1) # specify the likelihood of the data y = pm.ZeroInflatedPoisson("y",psi,theta,observed = counts) # inference step trace = pm.sample(1500) # ---------------------- analyse the posterior -------------- # with zip_model: az.plot_trace(trace)