def test_glm_from_formula(self): with Model() as model: NAME = 'glm' GLM.from_formula('y ~ x', self.data_linear, name=NAME) start = find_MAP() step = Slice(model.vars) trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed) assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0 assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0 assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0
def test_glm_from_formula(self): with Model() as model: NAME = 'glm' GLM.from_formula('y ~ x', self.data_linear, name=NAME) start = find_MAP() step = Slice(model.vars) trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed) self.assertAlmostEqual(np.mean(trace['%s_Intercept' % NAME]), self.intercept, 1) self.assertAlmostEqual(np.mean(trace['%s_x' % NAME]), self.slope, 1) self.assertAlmostEqual(np.mean(trace['%s_sd' % NAME]), self.sd, 1)
def test_glm_from_formula(self): with Model() as model: NAME = "glm" GLM.from_formula("y ~ x", self.data_linear, name=NAME) start = find_MAP() step = Slice(model.vars) trace = sample( 500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed ) assert round(abs(np.mean(trace["%s_Intercept" % NAME]) - self.intercept), 1) == 0 assert round(abs(np.mean(trace["%s_x" % NAME]) - self.slope), 1) == 0 assert round(abs(np.mean(trace["%s_sd" % NAME]) - self.sigma), 1) == 0
def pooled_model(X,y): ''' This function build a pooled model in PyMC3. This function will only work with four independent variables in the X matrix. INPUT: X - an np array containing a standardized variable matrix with four variables y - an np array containing the target values OUTPUT: pooled_model - a PyMC3 model object trace - a PyMC3 trace object ''' data = dict(x1=X[:,0], x2=X[:,1], x3=X[:,2], x4=X[:,3], y=y) with Model() as pooled_model: # specify glm and pass in data. The resulting linear model, its likelihood and # and all its parameters are automatically added to our model. GLM.from_formula('y ~ 1 + x1 + x2 + x3 +x4', data) # draw 3000 posterior samples using NUTS sampling trace = sample(1000, n_init=50000, tune=1000, njobs=1) return pooled_model, trace def unpooled_model(X, y, level, n_levels): ''' This function build a unpooled model in PyMC3. This function will only work with four independent variables in the X matrix. INPUT: X - an np array containing a standardized variable matrix with four variables y - an np array containing the target values level - an array with the level value for each row of the matrix n_levels - an INT indicating the number of unique level names OUTPUT: unpooled_model - a PyMC3 model object unpooled_trace - a PyMC3 trace object ''' with Model() as unpooled_model: intercept = Normal('intercept', 0, sd=1e5, shape=n_levels) beta1 = Normal('beta1', 0, sd=1e5) beta2 = Normal('beta2', 0, sd=1e5) beta3 = Normal('beta3', 0, sd=1e5) beta4 = Normal('beta4', 0, sd=1e5) sigma = HalfCauchy('sigma', 5) theta = intercept[level] + beta1 * X[:,0] + beta2 * X[:,1] + beta3 * X[:,2] + beta4 * X[:,3] y = Normal('y', theta, sd=sigma, observed=y) with unpooled_model: unpooled_trace = sample(1000, n_init=50000, tune=1000) return unpooled_model, unpooled_trace def multi_model(X, y, level, n_levels): ''' This function build a unpooled model in PyMC3. This function will only work with four independent variables in the X matrix. INPUT: X - an np array containing a standardized variable matrix with four variables y - an np array containing the target values level - an array with the level value for each row of the matrix n_levels - an INT indicating the number of unique level names OUTPUT: multi_model - a PyMC3 model object multi_trace - a PyMC3 trace object ''' with Model() as multi_model: #set intercept hyper priors mu_intercept = Normal('mu_intercep', mu=0., sd=1e5) sigma_intercept = HalfCauchy('sigma_intercep', 5) #set beta1 hyper priors mu_beta1 = Normal('mu_beta1', mu=0., sd=1e5) sigma_beta1 = HalfCauchy('sigma_beta1', 5) #set beta2 hyper priors mu_beta2 = Normal('mu_beta2', mu=0., sd=1e5) sigma_beta2 = HalfCauchy('sigma_beta2', 5) #set beta3 hyper priors mu_beta3 = Normal('mu_beta3', mu=0., sd=1e5) sigma_beta3 = HalfCauchy('sigma_beta3', 5) #set beta4 hyper priors mu_beta4 = Normal('mu_beta4', mu=0., sd=1e5) sigma_beta4 = HalfCauchy('sigma_beta4', 5) intercept = Normal('intercept', mu=mu_intercept, sd=sigma_intercept, shape=n_levels) beta1 = Normal('beta1', mu=mu_beta1, sd=sigma_beta1, shape=n_levels) beta2 = Normal('beta2', mu=mu_beta2, sd=sigma_beta2, shape=n_levels) beta3 = Normal('beta3', mu=mu_beta3, sd=sigma_beta3, shape=n_levels) beta4 = Normal('beta4', mu=mu_beta3, sd=sigma_beta4, shape=n_levels) sigma = HalfCauchy('sigma', 5) HIV_like = intercept[level] + beta1[level] * X[:,0] + beta2[level] * X[:,1] + beta3[level] * X[:,2] + beta4[level] * X[:,3] y = Normal('y', HIV_like, sd=sigma, observed=y) with multi_model: multi_trace = sample(1000, n_init=150000, tune=50000) return multi_model, multi_trace def score_model(model, trace): waic_score = stats.waic(model=model, trace=trace) loo_score = stats.loo(model=pooled_model_X, trace=pooled_X_trace) print('WAIC for this model is {} ({})'.format(round(waic_score[0], 2), round(waic_score[1], 2))) print('LOO for this model is {} ({})'.format(round(loo_score[0], 2), round(loo_score[1], 2))) if __name__ == '__main__': df = load.load_all_data(2015) df_no_zero_outlier = df[((df.HIVincidence > 0) & (df.HIVincidence < 130))].copy() us_states, n_states, state = make_levels(df_no_zero_outlier, 'STATEABBREVIATION') print(us_states, n_states, state)
#ax3.set_ylabel("") ax1.set_xticklabels(['Ketamine', 'Midazolam'], fontsize=14) ax2.set_xticklabels(['Ketamine', 'Midazolam'], fontsize=14) #ax3.set_xticklabels(['Ketamine', 'Midazolam'], fontsize=14) fig.savefig("changeCorrelation.png", dpi=300, bbox_inches='tight') # %% [markdown] # ## Use PyMC3 to compare the difference in correlation # %% # Using Pymc3 import pymc3 as pm from pymc3.glm import GLM with pm.Model() as model_glm: GLM.from_formula('amg_hipp_change ~ groupIdx', dfCors) trace = pm.sample(draws=4000, tune=3000) # %% pm.summary(trace, credible_interval=.95).round(2) # %% # Using Pymc3 - compare antrior hippo and antvmpfc with pm.Model() as model_glm2: GLM.from_formula('hippoAnt_vmpfcAnt_change ~ groupIdx', dfCors) trace2 = pm.sample(draws=4000, tune=2000) # %% pm.summary(trace2, credible_interval=.95).round(2) # %%
from pymc3.glm import GLM import pylab as plt import pandas from scipy.stats import uniform, norm # Data np.random.seed(1056) # set seed to replicate example nobs= 250 # number of obs in model x1 = uniform.rvs(size=nobs) # random uniform variable beta0 = 2.0 # intercept beta1 = 3.0 # angular coefficient xb = beta0 + beta1 * x1 # linear predictor, xb y = norm.rvs(loc=xb, scale=1.0, size=nobs) # create y as adjusted # Fit df = pandas.DataFrame({'x1': x1, 'y': y}) # re-write data with Model() as model_glm: GLM.from_formula('y ~ x1', df) trace = sample(5000) # Output summary(trace) # show graphical output traceplot(trace) plt.show()
def get_mcmc_betas(train_Ys, train_Xs): """ :return mcmc_betas: (Series) Coefficients of intercept and betas. """ print('train_size:', len(train_Xs)) train_data = pandas.DataFrame({ 'Y': train_Ys, 'X_1': train_Xs[:, 1], 'X_2': train_Xs[:, 2], 'X_3': train_Xs[:, 3], 'X_4': train_Xs[:, 4], 'X_5': train_Xs[:, 5], 'X_6': train_Xs[:, 6], 'X_7': train_Xs[:, 7], 'X_8': train_Xs[:, 8], 'X_9': train_Xs[:, 9], 'X_10': train_Xs[:, 10], 'X_11': train_Xs[:, 11], 'X_12': train_Xs[:, 12], 'X_13': train_Xs[:, 13], 'X_14': train_Xs[:, 14], 'X_15': train_Xs[:, 15], 'X_16': train_Xs[:, 16], 'X_17': train_Xs[:, 17], 'X_18': train_Xs[:, 18], 'X_19': train_Xs[:, 19], 'X_20': train_Xs[:, 20], 'X_21': train_Xs[:, 21], 'X_22': train_Xs[:, 22], 'X_23': train_Xs[:, 23], 'X_24': train_Xs[:, 24], 'X_25': train_Xs[:, 25], 'X_26': train_Xs[:, 26], 'X_27': train_Xs[:, 27], 'X_28': train_Xs[:, 28], 'X_29': train_Xs[:, 29], 'X_30': train_Xs[:, 30], 'X_31': train_Xs[:, 31], 'X_32': train_Xs[:, 32], 'X_33': train_Xs[:, 33], 'X_34': train_Xs[:, 34], 'X_35': train_Xs[:, 35], 'X_36': train_Xs[:, 36], 'X_37': train_Xs[:, 37], 'X_38': train_Xs[:, 38], 'X_39': train_Xs[:, 39], 'X_40': train_Xs[:, 40], 'X_41': train_Xs[:, 41], 'X_42': train_Xs[:, 42], 'X_43': train_Xs[:, 43], 'X_44': train_Xs[:, 44], 'X_45': train_Xs[:, 45] }) with pm.Model(): GLM.from_formula( 'Y ~ X_1 + X_2 + X_3 + X_4 + X_5 + X_6 + X_7 + X_8 + X_9 + X_10' ' + X_11 + X_12 + X_13 + X_14 + X_15 + X_16 + X_17 + X_18 + X_19 + X_20' ' + X_21 + X_22 + X_23 + X_24 + X_25 + X_26 + X_27 + X_28 + X_29 + X_30' ' + X_31 + X_32 + X_33 + X_34 + X_35 + X_36 + X_37 + X_38 + X_39 + X_40' ' + X_41 + X_42 + X_43 + X_44 + X_45', train_data, family=Binomial()) trace = pm.sample(cores=os.cpu_count()) summary = pm.summary(trace) pm.traceplot(trace) plt.savefig('stats/posterior_distribution.png') plt.show() mcmc_betas = summary['mean'] return mcmc_betas