# Random Y values generated from linear model with true parameter values: y = np.sum(x * beta_true[1:].T, axis=1) + beta_true[0] + norm.rvs(0, sd_true, n_data) # Select which predictors to include include_only = list(range(0, n_predictors)) # default is to include all #x = x.iloc[include_only] predictor_names = x.columns n_predictors = len(predictor_names) # THE MODEL with pm.Model() as model: # define hyperpriors muB = pm.Normal('muB', 0,.100 ) tauB = pm.Gamma('tauB', .01, .01) udfB = pm.Uniform('udfB', 0, 1) tdfB = 1 + tdfBgain * (-pm.log(1 - udfB)) # define the priors tau = pm.Gamma('tau', 0.01, 0.01) beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) beta1 = pm.T('beta1', mu=muB, lam=tauB, nu=tdfB, shape=n_predictors) mu = beta0 + pm.dot(beta1, x.values.T) # define the likelihood #mu = beta0 + beta1[0] * x.values[:,0] + beta1[1] * x.values[:,1] yl = pm.Normal('yl', mu=mu, tau=tau, observed=y) # Generate a MCMC chain start = pm.find_MAP() step1 = pm.NUTS([beta1]) step2 = pm.Metropolis([beta0, tau, muB, tauB, udfB]) trace = pm.sample(10000, [step1, step2], start, progressbar=False)
# Re-center data at mean, to reduce autocorrelation in MCMC sampling. # Standardize (divide by SD) to make initialization easier. x_m = np.mean(x) x_sd = np.std(x) y_m = np.mean(y) y_sd = np.std(y) zx = (x - x_m) / x_sd zy = (y - y_m) / y_sd tdf_gain = 1 # 1 for low-baised tdf, 100 for high-biased tdf # THE MODEL with pm.Model() as model: # define the priors udf = pm.Uniform('udf', 0, 1) tdf = 1 - tdf_gain * pm.log(1 - udf) # tdf in [1,Inf). tau = pm.Gamma('tau', 0.001, 0.001) beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) beta1 = pm.Normal('beta1', mu=0, tau=1.0E-12) mu = beta0 + beta1 * zx # define the likelihood yl = pm.T('yl', mu=mu, lam=tau, nu=tdf, observed=zy) # Generate a MCMC chain start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(20000, step, start, progressbar=False) # EXAMINE THE RESULTS burnin = 1000 thin = 10
# Random Y values generated from linear model with true parameter values: y = np.sum(x * beta_true[1:].T, axis=1) + beta_true[0] + norm.rvs(0, sd_true, n_data) # Select which predictors to include include_only = range(0, n_predictors) # default is to include all #x = x.iloc[include_only] predictor_names = x.columns n_predictors = len(predictor_names) # THE MODEL with pm.Model() as model: # define hyperpriors muB = pm.Normal('muB', 0,.100 ) tauB = pm.Gamma('tauB', .01, .01) udfB = pm.Uniform('udfB', 0, 1) tdfB = 1 + tdfBgain * (-pm.log(1 - udfB)) # define the priors tau = pm.Gamma('tau', 0.01, 0.01) beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) beta1 = pm.T('beta1', mu=muB, lam=tauB, nu=tdfB, shape=n_predictors) mu = beta0 + pm.dot(beta1, x.values.T) # define the likelihood #mu = beta0 + beta1[0] * x.values[:,0] + beta1[1] * x.values[:,1] yl = pm.Normal('yl', mu=mu, tau=tau, observed=y) # Generate a MCMC chain start = pm.find_MAP() step1 = pm.NUTS([beta1]) step2 = pm.Metropolis([beta0, tau, muB, tauB, udfB]) trace = pm.sample(10000, [step1, step2], start, progressbar=False)