def run(self,sample_iters,burning_iters, init=None, chains=None,cores=None, step=None,nuts_kwargs=None, *args,**kwargs): """ Performs the MCMC run. Arguments: sample_iters (integer): Number of MCMC iterations. burning_iters (integer): Number of burning iterations. """ print("Computing posterior") with self.Model as model: db = pm.backends.Text(self.dir_out) if self.prior is "GMM" and self.parametrization == "non-central": step = pm.ElemwiseCategorical(vars=[model.component], values=[0, 1]) trace = pm.sample(draws=sample_iters, tune=burning_iters, trace=db, chains=chains, cores=cores, discard_tuned_samples=True, step=[step]) else: trace = pm.sample(draws=sample_iters, tune=burning_iters, init=init, trace=db, nuts_kwargs=nuts_kwargs, chains=chains, cores=cores, discard_tuned_samples=True, *args,**kwargs)
def get_samples(coverage_distribution, estimated_haploid_cov, number_of_iterations, burn_period): K = 7 halfwidth_of_uniform = 0.2 __gc.collect() model = __pm.Model() with model: p = __pm.Dirichlet('p', a=__np.array([1., 1., 1., 1., 1., 1., 1.]), shape=K) c1 = __pm.Uniform( 'c1', (1 - halfwidth_of_uniform) * estimated_haploid_cov, (1 + halfwidth_of_uniform) * estimated_haploid_cov) means = __tt.stack( [c1, c1 * 2, c1 * 3, c1 * 4, c1 * 5, c1 * 6, c1 * 7]) order_means_potential = __pm.Potential( 'order_means_potential', __tt.switch(means[1] - means[0] < 0, -__np.inf, 0) + __tt.switch(means[2] - means[1] < 0, -__np.inf, 0)) sds = __pm.Uniform('sds', lower=0, upper=estimated_haploid_cov / 2, shape=K) category = __pm.Categorical('category', p=p, shape=len(coverage_distribution)) points = __pm.Normal('obs', mu=means[category], sd=sds[category], observed=coverage_distribution) with model: step1 = __pm.Metropolis(vars=[p, sds, means]) step2 = __pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2, 3, 4, 5, 6]) __logging.getLogger("pymc3").setLevel(__logging.WARNING) tr = __pm.sample(draw=number_of_iterations - burn_period, tune=burn_period, step=[step1, step2], progressbar=False, verbose=0, compute_convergence_checks=False) # trace = tr[burn_period:] # return trace return tr
def run_mv_model(data, K=3, n_feats=2, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples = len(data) tau = pm.Deterministic('tau', pm.floatX(tt.eye(n_feats) * 10)) mus = 0. if mus is None else mus mus = MvNormal('mus', mu=mus, tau=tau, shape=(K, n_feats)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], tau=tt.eye(n_feats), observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'tau']) plt.title('mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) return model, mod, trace
def run_normal_mv_model(data, K=3, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples, n_feats = data.shape #print n_samples,n_feats packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus #mus = pm.Normal('mus', mu = [[10,10], [55,55], [105,105], [155,155], [205,205]], sd = 10, shape=(K,n_feats)) mus = pm.Normal('mus', mu=mus, sd=10., shape=(K, n_feats), testval=data.mean(axis=0)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], chol=L, observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
nsites = data.shape[1] #without scaling model = pm.Model() with model: pi = pm.Dirichlet('pi', a=np.array([alphaprime]*nclusters),shape=nclusters) # Define priors pk = pm.Beta('pk', 1,1,shape=(nclusters,nsites)) z = pm.Categorical("z",p=pi,shape=ncells) # Define likelihood likelihood = pm.Bernoulli('likelihood',p=pk[z],observed=data,shape=(ncells)) with model: step1 = pm.Metropolis(vars=[pk, pi, alpha]) step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2]) tr = pm.sample(100, step=[step1, step2]) traceplot(trace) #attempt 1: specified as in BISCUIT model = pm.Model() with model: # model specifications in PyMC3 are wrapped in a with-statement pi = pm.Dirichlet('p', a=np.array([alphaprime]*nclusters)) # Define priors pk = Beta('pk', 1,1,shape=nclusters) alpha = Beta('alpha',1,.1,shape=ncells)
sd=np.array([10, 10]), shape=2) center_i = pm.Deterministic('center_i', centers[assignment]) sd_i = pm.Deterministic('sd_i', sds[assignment]) # and to combine it with the observations: observations = pm.Normal("obs", mu=center_i, sd=sd_i, observed=data) print("Random assignments: ", assignment.tag.test_value[:4], "...") print("Assigned center: ", center_i.tag.test_value[:4], "...") print("Assigned standard deviation: ", sd_i.tag.test_value[:4]) with model: step1 = pm.Metropolis(vars=[p, sds, centers]) step2 = pm.ElemwiseCategorical(vars=[assignment]) trace = pm.sample(25000, step=[step1, step2]) #figsize(12.5, 9) plt.subplot(311) lw = 1 center_trace = trace["centers"] # for pretty colors later in the book. colors = ["#348ABD", "#A60628"] if center_trace[-1, 0] > center_trace[-1, 1] \ else ["#A60628", "#348ABD"] plt.plot(center_trace[:, 0], label="trace of center 0", c=colors[0], lw=lw) plt.plot(center_trace[:, 1], label="trace of center 1", c=colors[1], lw=lw) plt.title("Traces of unknown parameters") leg = plt.legend(loc="upper right")
mix = np.random.normal(np.repeat(means, n_cluster), np.repeat(std_devs, n_cluster)) with pm.Model() as model_ug: # Each observation is assigned to a cluster/component with probability p p = pm.Dirichlet('p', a=np.ones(clusters)) category = pm.Categorical('category', p=p, shape=n_total) # We estimate the unknown gaussians means and standard deviation means = pm.Normal('means', mu=[10, 20, 35], sd=2, shape=clusters) sd = pm.HalfCauchy('sd', 5) y = pm.Normal('y', mu=means[category], sd=sd, observed=mix) step1 = pm.ElemwiseCategorical(vars=[category], values=range(clusters)) step2 = pm.Metropolis(vars=[means, sd, p]) trace_ug = pm.sample(10000, step=[step1, step2], nchains=1) chain_ug = trace_ug[1000:] pm.traceplot(chain_ug) plt.figure() ppc = pm.sample_ppc(chain_ug, 50, model_ug) for i in ppc['y']: sns.kdeplot(i, alpha=0.1, color='b') sns.kdeplot( np.array(mix), lw=2, color='k') # you may want to replace this with the posterior mean plt.xlabel('$x$', fontsize=14)
a_Beta1 = mu[cond_of_subj] * kappa1[cond_of_subj] b_Beta1 = (1 - mu[cond_of_subj]) * kappa1[cond_of_subj] #Prior on theta theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj) theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj) # if model_index == 0 then sample from theta1 else sample from theta0 theta = pm.switch(pm.eq(model_index, 0), theta1, theta0) # Likelihood: y = pm.Binomial('y', p=theta, n=n_trl_of_subj, observed=n_corr_of_subj) # Sampling step1 = pm.Metropolis([kappa0, kappa1, mu]) step2 = pm.NUTS([theta0, theta1]) step3 = pm.ElemwiseCategorical(vars=[model_index],values=[0,1]) trace = pm.sample(5000, step=[step1, step2, step3], progressbar=False) # EXAMINE THE RESULTS. burnin = 500 pm.traceplot(trace) model_idx_sample = trace['model_index'][burnin:] pM1 = sum(model_idx_sample == 1) / len(model_idx_sample) pM2 = 1 - pM1 plt.figure(figsize=(15, 15)) plt.subplot2grid((5,4), (0,0), colspan=4) plt.plot(model_idx_sample, label='p(M1|D) = %.3f ; p(M2|D) = %.3f' % (pM1, pM2));
# z is the component that the data point is being sampled from. # Since we have N data points, z should be a vector with N elements. z = pm.Categorical('z', p=p, shape=N) # Prior over the component means and standard deviations mu = pm.Normal('mu', mu=0., sd=10., shape=K) sigma = pm.HalfCauchy('sigma', beta=1., shape=K) # Specify the likelihood Y_obs = pm.Normal('Y_obs', mu=mu[z], sd=sigma[z], observed=y) ## Run sampler with gmm: # Specify the sampling algorithms to use step1 = pm.NUTS(vars=[p, mu, sigma]) step2 = pm.ElemwiseCategorical(vars=[z]) # Start the sampler! trace = pm.sample(draws=2000, chains=4, step=[step1, step2], random_seed=seed) # Plot results pm.traceplot(trace, var_names=['mu','p','sigma'], # Specify which variables to plot lines=[('mu',{},mus),('p',{},ps),('sigma',{},sigmas)], # Plots straight lines - useful for simulations compact=True #Don't split up variable plots by group ) plt.show()
def run_normal_mv_model_mixture_DIY(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): def logp_simple(mus, category, aux3): def logp_(value): spatial_factor = 0.00 aux = tt.ones((n_samples, )) logps = tt.zeros((n_samples)) sumlogps = tt.zeros((K, n_samples)) pi = tt.sum(tt.eq(aux3, (aux * category).reshape((n_samples, 1))), axis=1) / 8.0 #TODO son logps y sumlops siempre sustituidos en todos lo valortes for i, label in enumerate(range(K)): pi_l = tt.sum(tt.eq(aux3, (aux * label).reshape( (n_samples, 1))), axis=1) / 8.0 sumlogps = tt.set_subtensor(sumlogps[i, :], (mus[label].logp(value)) + (pi_l - 1) * spatial_factor) sumlogps = tt.sum(sumlogps, axis=0) for label in range(K): indx = tt.eq(category, tt.as_tensor_variable(label)).nonzero() logps = tt.set_subtensor( logps[indx], (mus[label].logp(value)[indx]) + (pi[indx] - 1) * spatial_factor - sumlogps[indx]) return logps return logp_ #K = 3 n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 shp = (K, n_feats) mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0) alpha = 0.1 * np.ones((n_samples, K)) with pm.Model() as model: packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i, n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(K) ] L = [ pm.expand_packed_triangular(n_feats, packed_L[i]) for i in range(K) ] #sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus #sds = pm.Uniform('sds',lower=0., upper=150., shape = shp ) mus = pm.Normal('mus', mu=100., sd=1, shape=shp) pi = Dirichlet('pi', a=alpha, shape=(n_samples, K)) category = pm.Categorical('category', p=pi, shape=n_samples) shit_max = pm.Deterministic('shit_max', tt.max(category)) shit_min = pm.Deterministic('shit_min', tt.min(category)) #mvs = [MvNormal('mu_%d' % i, mu=mus[i],tau=pm.floatX(1. * np.eye(n_feats)),shape=(n_feats,)) for i in range(K)] mvs = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)] aux2 = tt.set_subtensor(aux[inds], category[to_fill]) xs = DensityDist('x', logp_simple(mvs, category, aux2.reshape((n_samples, max_neigs))), observed=data) with model: step2 = step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step=step2, tune=1000, chains=4) pm.traceplot(trace, varnames=['mus', 'sds']) plt.title('logp_sum_mo_alpha_700_tunes_spatial_2') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) return model, mod, trace
def run_normal_mv_model_prior(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus mus = pm.Normal('mus', mu=[[10, 10], [55, 55], [105, 105], [155, 155], [205, 205]], sd=10, shape=(K, n_feats)) #sds = pm.HalfNormal('sds',sd = 50, shape = (K,n_feats) ) #mus = pm.Normal('mus', mu = [10,55,105,155,205], sd = sds , shape=(K,n_feats) ) #nu = pm.Exponential('nu', 1./10, shape=(K,n_feats), testval=tt.ones((K,n_feats)) ) #mus = pm.StudentT('mus',nu=nu, mu = [[10],[55],[105],[155],[205]], sd = 100., shape=(K,n_feats)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) #pm.Deterministic('pri', tt.as_tensor_variable(get_prior2(category))) #prior = pm.Deterministic('prior',tt.stack( [tt.sum(tt.eq(category[i], category[indxs_neig(i, n_rows=73, n_cols=74)]))/8.0 for i in range(73*74) ] )) #prior = pm.Deterministic('prior',tt.sum(tt.eq(category , category[[j for j in range(8)]].reshape( (8,1) ) ))) aux2 = tt.set_subtensor(aux[inds], category[to_fill]) prior = pm.Deterministic( 'prior', (tt.sum(tt.eq(aux2.reshape( (n_samples, max_neigs)), category.reshape((n_samples, 1))), axis=1) + 0.0) / 8.0) #prior2 = pm.Normal('prior2', mu = prior, sd = 0.5, shape= n_samples) # aux3 = tt.as_tensor_variable(pm.floatX([1,1,2,2,2,2,2,2,2,2]*100 )) # aux3 = tt.set_subtensor( aux3[(tt.eq(category,1)).nonzero()], 2 ) # prior2 = pm.Deterministic('prior2', aux3 ) # xs = DensityDist('x', logp_gmix(mus[category], L, prior, category), observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) #step = pm.CategoricalGibbsMetropolis(vars = [prior] ) trace = sample(mc_samples, step=[step2], n_jobs=jobs, tune=600) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model 40 cols') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
category_print = tt.printing.Print('category')(category) #mvs = [MvNormal('mu_%d' % i, mu=mus[i],tau=pm.floatX(1. * np.eye(n_feats)),shape=(n_feats,)) for i in range(K)] #mvs = [pm.MvNormal.dist(mu = mus[i], chol = L[i]) for i in range(K)] mvs = [ pm.MvNormal.dist(mu=mus[i], tau=np.eye(n_feats, dtype=np.float)) for i in range(K) ] aux2 = tt.set_subtensor(aux[inds], category[to_fill]) xs = DensityDist('x', logp_simple(mvs, category, aux2.reshape((n_samples, max_neigs))), observed=data) step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) mystep = pm.Metropolis(vars=[mus, sds]) trace = sample(10000, start=pm.find_MAP(), step=[mystep, step2], tune=3000, chains=1, discard_tuned_samples=True, exception_verbosity='high') pm.traceplot(trace, varnames=['mus', 'sds']) #plt.title('logp_sum_mo_alpha_700_tunes_spatial_2map') m = stats.mode(trace['category'][int(1000 * 0.75):]) #for RV in model.basic_RVs: