def evaluate(self, model_name, observations, variables, n_iter=1000, chains=4): """Run JAGS MCMC on a model. Parameters ---------- model_name : str The name of the model. observations : dict A dictionary of observation variables (keys) and their values. variables : list The variables for which samples should be generated. n_iter : int, optional The number of iterations for MCMC. [default: 1000] chains : int, optional The number of MCMC chains to be run. [default: 4] Returns ------- samples : dict A dictionary that contains sample values. Keys are variable names. The value for each variable is a list that, for each iteration, contains another list with values for each MCMC chain. """ model = pyjags.Model(self.models[model_name]['model'], data=observations, chains=chains) samples = model.sample(n_iter, vars=variables) return samples
def aggregate(self, reports): script_path = os.path.abspath(__file__) script_dir = os.path.abspath(os.path.join(script_path, os.pardir)) path = os.path.join(script_dir, 'hgcm_model.jags') n = reports.shape[0] m = reports.shape[1] Xtheta = np.transpose(np.array([np.ones(n)])) Xg = np.transpose(np.array([np.ones(n)])) Xdelta = np.transpose(np.array([np.ones(m)])) model = pyjags.Model(file=path, data=dict(Y=reports, n=n, m=m, nrofdeltacov=1, nrofgcov=1, nrofthetacov=1, Xtheta=Xtheta, Xg=Xg, Xdelta=Xdelta), chains=self.chains, adapt=self.adapt, progress_bar=self.progress_bar, threads=self.threads) self.run_sampling(model)
def pyJagsPairedTTest(X, Y, compMu=0, nAdapt=500, nChains=3, nUpdate=100, nIter=5000, thin=1): pairDiff = [x - y for x, y in zip(X, Y)] pairMAD = mad0(pairDiff) dataList = { 'pair_diff': pairDiff, 'mean_mu': mean(pairDiff), 'precision_mu': 1 / (pairMAD**2 * 1000000), 'sigma_low': pairMAD / 1000, 'sigma_high': pairMAD * 1000, 'comp_mu': compMu } initList = { 'mu_diff': mean(pairDiff), 'sigma_diff': pairMAD, 'nuMinusOne': 4, } params = ["mu_diff", "sigma_diff", "nu", "eff_size", "diff_pred"] paired_samples_t_model_string = """ model { for(i in 1:length(pair_diff)) { pair_diff[i] ~ dt( mu_diff , tau_diff , nu ) } diff_pred ~ dt( mu_diff , tau_diff , nu ) eff_size <- (mu_diff - comp_mu) / sigma_diff mu_diff ~ dnorm( mean_mu , precision_mu ) tau_diff <- 1/pow( sigma_diff , 2 ) sigma_diff ~ dunif( sigma_low , sigma_high ) # A trick to get an exponentially distributed prior on nu that starts at 1. nu <- nuMinusOne + 1 nuMinusOne ~ dexp(1/29) } """ jagsModel = pyjags.Model(paired_samples_t_model_string, data=dataList, chains=nChains, init=initList, adapt=nAdapt) jagsModel.update(nUpdate) sampled = jagsModel.sample(nIter, vars=params, thin=thin) return (sampled)
def sample(self): self.pyjags_model = pyjags.Model(**self.model_spec.get_model_args()) if self.model_spec.adapt == 'auto': while not self.pyjags_model.adapt(500): pass if int(self.model_spec.burnin) > 0: burnin_args = self.model_spec.get_sample_args() burnin_args['vars'] = None burnin_args['iterations'] = int(self.model_spec.burnin) print("Burn-in iterations (no samples recorded):") self.sample_data = self.pyjags_model.sample(**burnin_args) print("Sample iterations:") self.sample_data = self.pyjags_model.sample( **self.model_spec.get_sample_args()) return SampleHandler(self.sample_data)
def infer( # type: ignore self, data: xr.Dataset, iterations: int, num_warmup: int, seed: int, RNG_name: str = "base::Mersenne-Twister", ) -> xr.Dataset: """ See https://phoenixnap.dl.sourceforge.net/project/mcmc-jags/Manuals/4.x/jags_user_manual.pdf for JAGS documentation. :param data: PPLBench dataset :param iterations: number of samples to create :param seed: seed for random number generator :param adapt: the number of adaptive steps :param RNG_name: the name of the random number generator :returns: samples dataset """ model = pyjags.Model( code=self.impl.get_code(), data=self.impl.format_data_to_jags(data), chains=1, adapt=num_warmup, init={ ".RNG.seed": seed, ".RNG.name": RNG_name }, ) samples = model.sample(iterations - num_warmup, vars=self.impl.get_vars()) # squeeze out the chain dimension from the samples for varname in samples.keys(): samples[varname] = samples[varname].squeeze(-1) samples = self.impl.extract_data_from_jags(samples) # because jags does not return warm up samples, we need to shift the coordinates # of the actual samples by num_warmup by padding with NaN samples = samples.assign_coords(draw=samples.draw + num_warmup) padding = xr.Dataset(coords={"draw": np.arange(num_warmup)}) return padding.merge(samples)
def infer( # type: ignore self, data: xr.Dataset, num_samples: int, seed: int, adapt: Optional[int] = None, RNG_name: str = "base::Mersenne-Twister", ) -> xr.Dataset: """ See https://phoenixnap.dl.sourceforge.net/project/mcmc-jags/Manuals/4.x/jags_user_manual.pdf for JAGS documentation. :param data: PPLBench dataset :param num_samples: number of samples to create :param seed: seed for random number generator :param adapt: the number of adaptive steps :param RNG_name: the name of the random number generator :returns: samples dataset """ if adapt is None: adapt = num_samples model = pyjags.Model( code=self.impl.get_code(), data=self.impl.format_data_to_jags(data), chains=1, adapt=adapt, init={ ".RNG.seed": seed, ".RNG.name": RNG_name }, ) samples = model.sample(num_samples, vars=self.impl.get_vars()) # squeeze out the chain dimension from the samples for varname in samples.keys(): samples[varname] = samples[varname].squeeze(-1) return self.impl.extract_data_from_jags(samples)
def model(self, *args, **kwargs): return pyjags.Model(*args, threads=3, **kwargs)
def model(self, *args, **kwargs): return pyjags.Model(*args, progress_bar=False, **kwargs)
'deltasub': np.random.uniform(-4., 4., size=(nsubs)), 'tersub': np.zeros((nsubs)), 'alphacond': np.random.uniform(.5, 2.), 'deltacond': np.random.uniform(-4., 4.), 'tercond': np.random.uniform(0., .3), 'alphasubsd': np.random.uniform(.01, 1.), 'deltasubsd': np.random.uniform(.01, 3.), 'tersubsd': np.random.uniform(.01, .1) } for j in range(0, nsubs): chaininit['tersub'][j] = np.random.uniform(0., minrt[j] / 2) initials.append(chaininit) print 'Fitting model %i ...' % n threaded = pyjags.Model(file=modelfile, init=initials, data=dict(y=y, N=N, nsubs=nsubs, maxrt=maxrt, subject=subject, Ones=Ones, Constant=10), chains=nchains, adapt=burnin, threads=6, progress_bar=True) samples = threaded.sample(nsamps, vars=trackvars, thin=10) savestring = ('modelfits/trialparam3_test_model%i.mat') % (n + 1) print 'Saving results to: \n %s' % savestring sio.savemat(savestring, samples)
def main(): experiment_name = '%s_%s_budget%d' % (args.dataset, args.attribute, args.budget) path = OUTPUT_DIR + experiment_name if not os.path.exists(path): os.makedirs(path) dataset = Dataset.load_from_file(DIR + "%s_%s_scores_remapped.csv" % (args.dataset, args.attribute), args.dataset) print("\n\n\n================%s================" % dataset.dataset_name) dataset.shuffle(random_state=args.run_id, attribute=args.attribute) n = dataset.df.shape[0] vars_list = ['theta'] # train the model samples = dict() for idx, algorithm in enumerate(algorithms): print("=================%s=================" % algorithm) model = pyjags.Model(code_beta_binomial, data=dict(nl=args.budget, nc=args.num_groups, ct=dataset.df[args.attribute][: n] + 1, s=np.clip(dataset.df['score_' + algorithm][: n], 0.01, 0.99), y=dataset.df[dataset.class_attr][: n]), chains=4, adapt=1000) model.sample(500, vars=vars_list) samples[algorithm] = model.sample(200, vars=vars_list) pickle.dump(samples, open(path + '/samples_run%d.pkl' % args.run_id, 'wb'), -1) for idx, algorithm in enumerate(algorithms): print("=================%s=================" % algorithm) for group_id in range(args.num_groups): print("======Group:%d======" % group_id) trace = xr.Dataset({k: (("Iteration", "Chain"), v[group_id]) for \ k, v in samples[algorithm].items()}) print(trace.to_dataframe().mean()) print(trace.to_dataframe().quantile([0.05, 0.95])) # True accuracy mask = (dataset.df[args.attribute] == group_id) # all instaces y = dataset.df[dataset.class_attr][:n][mask] s = dataset.df['score_' + algorithm][:n][mask] predlabel = (s > 0.5) * 1.0 s = np.maximum(s, 1 - s) # labeled instaces yl = dataset.df[dataset.class_attr][:args.budget][mask] sl = dataset.df['score_' + algorithm][:args.budget][mask] predlabell = (sl > 0.5) * 1.0 sampled_theta = samples[algorithm]['theta'][group_id].flatten() mean_theta = np.mean(sampled_theta) lb_theta = np.quantile(sampled_theta, 0.025) ub_theta = np.quantile(sampled_theta, 0.975) print("======== True accuracy: %.2f" % (y == predlabel).mean()) print("======== Predicted accuracy: %.2f, (%.2f, %.2f)" % (mean_theta, lb_theta, ub_theta)) print("======== Empirical accuracy: %.2f" % (yl == predlabell).mean()) print("======== Uncalibrated predicted accuracy:%.2f\n" % (s.mean()))
indextrack += subn # Create dictionary of initial values initials = [] for c in range(0, nchains): chaininit = { 'alphasub': np.random.uniform(.5, 2., size=(nsubs)), 'deltasub': np.random.uniform(-4., 4., size=(nsubs)), 'tersub': np.zeros((nsubs)), 'alphacond': np.random.uniform(.5, 2.), 'deltacond': np.random.uniform(-4., 4.), 'tercond': np.random.uniform(0., .3), 'alphasubsd': np.random.uniform(.01, 1.), 'deltasubsd': np.random.uniform(.01, 3.), 'tersubsd': np.random.uniform(.01, .1) } for j in range(0, nsubs): chaininit['tersub'][j] = np.random.uniform(0., minrt[j] - .1) initials.append(chaininit) print 'Fitting model %i ...' % n threaded = pyjags.Model(file=modelfile, init=initials, data=dict(y=y, N=N, nsubs=nsubs, subject=subject), chains=nchains, adapt=burnin, threads=6, progress_bar=True) samples = threaded.sample(nsamps, vars=trackvars, thin=10) savestring = ('modelfits/trialparam_test_model%i.mat') % (n + 1) print 'Saving results to: \n %s' % savestring sio.savemat(savestring, samples)
if m == 0: threaded = pyjags.Model( code=model, data=dict(IQ=ma.masked_array(IQlarge, mask=np.ones(IQlarge.size, dtype=bool)), ERPdata=ERPlarge, N=N, y=ylarge, person=person, task=task, T=T, I=I, possamps=randsampLength, IQ_lambda=IQ_lambda, IQ_theta=IQ_theta, ERP_lambda=ERP_lambda, ERP_theta=ERP_theta, RT_nu=RT_nu, ERP_psi=ERP_psi, IQ_psi=IQ_psi, RT_psi=RT_psi, RTy_theta=RTy_theta, RT_lambda=RT_lambda, RT_theta=RT_theta, beta=beta), chains=nchains, adapt=burnin, threads=nchains, progress_bar=True) elif m == 1:
'alpha': np.random.uniform(.5, 2., size=(nparts, nconds)), 'problapse': np.random.uniform(.01, .1, size=nparts) } for p in range(0, nparts): for c in range(0, nconds): chaininit['ndt'][p, c] = np.random.uniform(0., minrt[p, c] / 2) initials.append(chaininit) print('Fitting model 3 ...') threaded = pyjags.Model(file=modelfile, init=initials, data=dict(y=y, N=N, regressors1=regressors1, nparts=nparts, nconds=nconds, condition=condition, participant=participant, Ones=Ones, Constant=Constant), chains=nchains, adapt=burnin, threads=6, progress_bar=True) samples = threaded.sample(nsamps, vars=trackvars, thin=10) savestring = ('modelfits/genparam_test1_model3.mat') print('Saving results to: \n %s' % savestring) sio.savemat(savestring, samples) #Diagnostics samples = sio.loadmat(savestring) samples_diagrelevant = samples.copy()
def model(self, *args, **kwargs): return pyjags.Model(*args, threads=3, chains_per_thread=2, **kwargs)
obsx[i] ~ dnorm(x[i], pow(errx[i], -2)) obsy[i] ~ dnorm(y[i], pow(erry[i], -2)) # likelihood function y[i] ~ dnorm(mu[i], tau) mu[i] <- alpha + beta*x[i] # linear predictor } # Prediction for new data for (j in 1:M){ etax[j]<-alpha+beta*xx[j] mux[j] <- etax[j] Yx[j]~dnorm(mux[j],tau) } }""" # Run mcmc model = pyjags.Model(jags_code, data=data, chains=3) samples = model.sample(5000, vars=['alpha', 'beta', 'epsilon', 'mux']) 9 def summary(samples, varname, p=95): values = samples[varname] ci = np.percentile(values, [100-p, p]) print('{:<6} mean = {:>5.1f}, {}% credible interval [{:>4.1f} {:>4.1f}]'.format( varname, np.mean(values), p, *ci)) for varname in ['alpha', 'beta', 'epsilon']: summary(samples, varname) # get Gaussian fit
Nsamples = 1000 # set the number of iterations of the sampler chains = 4 # set the number of chains to run with # dictionary for inputs into line_code linedict = {} linedict['mmu'] = 0.0 # mean of Gaussian prior distribution for m linedict[ 'minvvar'] = 1 / 10**2 # inverse variance of Gaussian prior distribution for m linedict['clower'] = -10 # lower bound on uniform prior distribution for c linedict['cupper'] = 10 # upper bound on uniform prior distribution for c linedict['invvar'] = 1 / sigma**2 # inverse variance of the data # compile model model = pyjags.Model(line_code_jags.format(**linedict), data=datadict, chains=chains) samples = model.sample(Nsamples, vars=['m', 'c']) # perform sampling mchainjags = samples['m'].flatten() cchainjags = samples['c'].flatten() # extract the samples postsamples = np.vstack((mchainjags, cchainjags)).T # plot posterior samples (if corner.py is installed) try: import corner # import corner.py except ImportError: sys.exit(1)
} """ N = 1000 mu, sigma = 5, .3 x = np.random.randn(N) * sigma + mu adapt = 100 burn = 100 nchains = 3 nsteps = 500 thin = 1 niter = int(np.ceil((nsteps * thin) / float(nchains))) data = {'x': x, 'N': N} mod = pyjags.Model(modstr, data, nchains=nchains, inits={ 'mu': np.mean(x), 'sigma': np.std(x) }) mod.burnin(100) ms = mod.sample(niter=niter, thin=thin) print "Real mu (sigma)=%.2f (%.2f)" % (mu, sigma) print "Estimated (means):" print np.mean(ms, 0) print "Estimated (5-95 percentile):" print np.percentile(ms, q=[5, 95], axis=0).T print mod.gelman_diagnostic() print mod.dic()
code = """ model { for (i in 1:Ntotal) { y[i] ~ dbern(theta[s[i]]) } for (s in 1:Nsubj) { theta[s] ~ dbeta(2,2) } } """ num_chains = 3 # model = pyjags.Model(code, data=dict(s=s, Ntotal=n_total, Nsubj=n_subject), chains=num_chains) model = pyjags.Model(code, data=dict(y=y, s=s, Ntotal=n_total, Nsubj=n_subject), chains=num_chains) model.update(500) samples = model.sample(20000, vars=['theta']) print(np.shape(samples['theta'])) summary(samples['theta'][0], 'theta[1]') summary(samples['theta'][1], 'theta[2]') difference = samples['theta'][0] - samples['theta'][1] print(np.shape(difference)) summary(difference, 'theta[1] - theta[2]') plt.hist(samples['theta'][0].flatten(), 50,
def model(self, *args, **kwargs): """Create new model instance.""" return pyjags.Model(*args, **kwargs)
sigma = 50 x = np.random.uniform(0, 100, size=N) y = np.random.normal(a + x * b, sigma, size=N) code = ''' model { for (i in 1:N) { y[i] ~ dnorm(alpha + beta * x[i], tau) } alpha ~ dunif(-1e3, 1e3) beta ~ dunif(-1e3, 1e3) tau <- 1 / sigma^2 sigma ~ dgamma(1e-4, 1e-4) } ''' model = pyjags.Model(code, data=dict(x=x, y=y, N=N), chains=4) samples = model.sample(5000, vars=['alpha', 'beta', 'sigma']) def summary(samples, varname, p=95): values = samples[varname] ci = np.percentile(values, [100 - p, p]) print('{:<6} mean = {:>5.1f}, {}% credible interval [{:>4.1f} {:>4.1f}]'. format(varname, np.mean(values), p, *ci)) for varname in ['alpha', 'beta', 'sigma']: summary(samples, varname)
def sample(self): self.pyjags_model = pyjags.Model(**self.model_spec.get_model_args()) self.sample_data = self.pyjags_model.sample( **self.model_spec.get_sample_args()) return SampleHandler(self.sample_data)
def time_jags(): return pyjags.Model(model, data=model_data, chains=1, adapt=0)
print 'Fitting model %s ...' % (modelname + timestart) indata = data = dict(N=N, y=y, nses=nses, nconds=nconds, maxrt=maxrt, Ones=Ones, Constant=Constant, EEGsession=sessioncount, condition=condition, n200lat=n200lat, experiment=experiment, nexps=2) threaded = pyjags.Model(file=modelfile, init=initials, data=indata, chains=nchains, adapt=burnin, threads=6, progress_bar=True) samples = threaded.sample(nsamps, vars=trackvars, thin=10) savestring = 'jagsmodel_' + \ modelname + timestart + ".mat" print 'Saving results to: \n %s' % savestring sio.savemat(savestring, samples)
{ # Likelihood for (t in 1:T) { y[t] ~ dbin(p[t], K) logit(p[t]) <- alpha + beta_1 * x_1[t] + beta_2 * x_2[t] } # Priors alpha ~ dnorm(0.0,0.01) beta_1 ~ dnorm(0.0,0.01) beta_2 ~ dnorm(0.0,0.01) } """ # Set up the data model = pyjags.Model(code, data=dict(T = T, y = y, x_1 = x_1, x_2 = x_2, K = 1)) # Number of iterations to remove at start model.sample(200, vars=[]) # Choose the parameters to watch and iterations: samples = model.sample(1000, vars=['alpha', 'beta_1', 'beta_2']) """ Simulated results ---------------------------------------------------------------- """ def summary(samples, varname, p=95): values = samples[varname] ci = np.percentile(values, [100-p, p]) print('{:<6} mean = {:>5.1f}, {}% credible interval [{:>4.1f} {:>4.1f}]'.format( varname, np.mean(values), p, *ci))
np.random.uniform(0, 1., size=3) } initials.append(chaininit) # Run JAGS model # Choose JAGS model type print 'Finding posterior predictives with %s model ...' % modelname threaded = pyjags.Model(code=model, init=initials, data=dict(IQ=IQdata, ERPdata=ERPdata, N=N, y=y, person=person, task=task, T=T, I=I), chains=nchains, adapt=burnin, threads=nchains, progress_bar=True) samples = threaded.sample(nsamps, vars=trackvars, thin=thin) savestring = '../Results/' + \ modelname + ".mat" print 'Saving %s results to: \n %s' % (modelname, savestring) S sio.savemat(savestring, samples)
code = """ model { for (i in 1:N) { y[i] ~ dbern(theta) # likelihood } theta ~ dbeta(omega[m] * (kappa - 2) + 1, (1 - omega[m]) * (kappa - 2) + 1) omega[1] <- .25 omega[2] <- .75 kappa <- 12 m ~ dcat(mPriorProb[]) mPriorProb[1] <- .5 mPriorProb[2] <- .5 } """ model = pyjags.Model(code, data=dict(y=y, N=n), chains=3, adapt=500) model.update(500) samples = model.sample(3334, vars=['theta', 'm']) samples_flatten = dict([(k, v.flatten()) for k, v in samples.items()]) df_samples = pd.DataFrame.from_dict(samples_flatten) theta_m1 = df_samples[df_samples['m'] == 1]['theta'].tolist() theta_m2 = df_samples[df_samples['m'] == 2]['theta'].tolist() fig, (ax1, ax2, ax3) = plt.subplots(1, 3) ax1.hist(df_samples['theta'].tolist(), 50, density=True,
mu[1] <- Y[1] # Likelihood function for (t in 2:N) { Y[t] ~ dnorm(mu[t],tau) mu[t] <- phi[1] + phi[2] * Y[t-1] } # Prediction for (t in 1:N){ Yx[t]~dnorm(mu[t],tau) } }""" # Run mcmc model = pyjags.Model(AR1_NORM, data=data, chains=3) samples = model.sample(5000, vars=['sd', 'phi', 'Yx']) def summary(samples, varname, p=95): if varname == 'phi': for k in range(2): values = samples[varname][k] ci = np.percentile(values, [100-p, p]) print('{:<6} mean = {:>5.1f}, {}% credible interval [{:>4.1f} {:>4.1f}]'.format( varname[k], np.mean(values), p, *ci)) else: values = samples[varname] ci = np.percentile(values, [100-p, p]) print('{:<6} mean = {:>5.1f}, {}% credible interval [{:>4.1f} {:>4.1f}]'.format(
# map subject to [1,n_group] s_map_new_old = {} for si, s_name in enumerate(s_list): s_map_new_old[si + 1] = s_name s_map_old_new = {} for si, s_name in enumerate(s_list): s_map_old_new[s_name] = si + 1 s = np.array([s_map_old_new[si] for si in s]) data = dict(y=y, x=x, s=s, Nsubj=n_group, Ntotal=len(y)) model = pyjags.Model(code, data=data, chains=n_chains, adapt=burn_in, threads=n_threads) parameters = ["beta0", "beta1", "beta0mu", "sigma", "nu"] samples = model.sample(n_iter, vars=parameters) ######################### ##### Calculate HDI ##### results = [] for i in range(n_group): d_name = s_map_new_old[i + 1] beta0 = samples['beta0'][i].reshape(n_iter * n_chains, ) beta0_mode = np.mean(pymc.utils.hpd(beta0, 0.99)) beta0_HDI = pymc.utils.hpd(beta0, 1 - (p / 100.)) beta0_ESS = pymc.diagnostics.effective_n(samples['beta0'][i])
for (i in 1:N){ Y[i] ~ dnorm(mu[i], tau) mu[i] <- eta[i] eta[i] <- beta0 + beta1 * X[i] } # Prediction for new data for (j in 1:M){ etax[j] <- beta0 + beta1 * xx[j] mux[j] <- etax[j] Yx[j] ~ dnorm(mux[j],tau) } }""" model = pyjags.Model(NORM, data=toy_data, chains=3) samples = model.sample(5000, vars=['beta0', 'beta1', 'sigma', 'Yx', 'mux']) def summary(samples, varname, p=95): values = samples[varname] ci = np.percentile(values, [100-p, p]) print('{:<6} mean = {:>5.1f}, {}% credible interval [{:>4.1f} {:>4.1f}]'.format( varname, np.mean(values), p, *ci)) for varname in ['beta0', 'beta1', 'sigma']: summary(samples, varname) # get Gaussian fit beta0_mean, beta0_std = norm.fit(samples['beta0'][0][:,0])
code = """ model { for (i in 1:N) { y[i] ~ dbern(theta) # likelihood } theta ~ dbeta(1, 1) # prior } """ def generate_init_theta(): resampled_y = np.random.choice(y, np.random.randint(1, high=n)) theta_init = sum(resampled_y) / len(resampled_y) # keep away from 0, 1 theta_init = 0.001 + 0.998 * theta_init return theta_init num_chains = 3 init_thetas = [dict(theta=generate_init_theta()) for _ in range(num_chains)] # Initialize models model = pyjags.Model(code, data=dict(y=y, N=n), init=init_thetas, chains=num_chains, adapt=500) model.update(500) samples = model.sample(3334, vars=['theta']) print(samples['theta']) print(np.shape(samples['theta'])) summary(samples['theta'], 'theta')