def inference(self, iter_=5000, burn=1000): theta = pm.Container([ pm.CompletedDirichlet( "theta_%s" % d, pm.Dirichlet("ptheta_%s" % d, theta=self.alpha)) for d in range(self.D) ]) phi = pm.Container([ pm.CompletedDirichlet("phi_%s" % k, pm.Dirichlet("pphi_%s" % k, theta=self.beta)) for k in range(self.K) ]) z_d = pm.Container([ pm.Categorical("z_%s" % d, p=theta[d], value=np.random.randint(self.K, size=len(self.bw[d])), size=len(self.bw[d])) for d in range(self.D) ]) w_z = pm.Container([ pm.Categorical("w_%s_%s" % (d, w), p=phi[z_d[d][w].get_value()], value=self.bw[d][w], observed=True) for d in range(self.D) for w in range(len(self.bw[d])) ]) model = pm.Model([theta, phi, z_d, w_z]) self.mcmc = pm.MCMC(model) self.mcmc.sample(iter=iter_, burn=burn)
def get_z_data(self, p, p_pos, q): K = 2 # Num topics M = p # Num documents N = q # Total num of unique words across all documents alpha = 1.0 # Concentration parameter for distribution over # distributions over words (one for each topic) beta = 1.0 # Concentration parameter for distribution over # distributions over topics (one for each # document) phi = pymc.Container([ pymc.CompletedDirichlet( name="phi_" + str(k), D=pymc.Dirichlet(name="phi_temp_" + str(k), theta=beta * numpy.ones(N)), ) for k in range(K) ]) theta = pymc.Container([ pymc.CompletedDirichlet( name="theta_" + str(m), D=pymc.Dirichlet(name="theta_temp_" + str(m), theta=alpha * numpy.ones(K)), ) for m in range(M) ]) z = pymc.Container([ pymc.Categorical(name="z_" + str(m), p=theta[m], size=N) for m in range(M) ]) w = pymc.Container([ pymc.Categorical( name="w_" + str(m) + "_" + str(n), p=pymc.Lambda( "phi_z_" + str(m) + str(n), lambda z_in=z[m][n], phi_in=phi: phi_in[z_in], ), ) for m in range(M) for n in range(N) ]) lda = pymc.Model([w, z, theta, phi]) z_rvs = [] for m in range(M): metadata = {"doc_idx": m, "num_unique_words": N} rv = WordCountVecRV( model=lda, name="w_0_0", metadata=metadata) # Note: w_0_0 is just a dummy # argument that must be present in # the pymc.Model z_rvs += [rv] return z_rvs
def __init__(self, corpus, K=10, iterations=1000, burn=100): print("Building model ...") self.K = K self.V = corpus.wordCount + 1 self.M = corpus.documentCount self.alpha = np.ones(self.K) self.beta = np.ones(self.V) self.corpus = corpus self.observations = np.array(corpus.observations) self.phi = np.empty(self.K, dtype=object) for i in range(self.K): self.phi[i] = pm.CompletedDirichlet( "Phi[%i]" % i, pm.Dirichlet("phi[%i]" % i, theta=self.beta)) self.phi = pm.Container(self.phi) self.theta = np.empty(self.M, dtype=object) for i in range(self.M): self.theta[i] = pm.CompletedDirichlet( "Theta[%i]" % i, pm.Dirichlet("theta[%i]" % i, theta=self.alpha)) self.theta = pm.Container(self.theta) self.z = np.empty(self.observations.shape, dtype=object) for i in range(self.M): self.z[i] = pm.Categorical("z[%i]" % i, size=len(self.observations[i]), p=self.theta[i], value=np.random.randint( self.K, size=len(self.observations[i]))) self.z = pm.Container(self.z) self.w = [] for i in range(self.M): self.w.append([]) for j in range(len(self.observations[i])): self.w[i].append( pm.Categorical( "w[%i][%i]" % (i, j), p=pm.Lambda( "phi[z[%i][%i]]" % (i, j), lambda z=self.z[i][j], phi=self.phi: phi[z]), value=self.observations[i][j], observed=True)) self.w = pm.Container(self.w) self.mcmc = pm.MCMC(pm.Model([self.theta, self.phi, self.z, self.w])) print("Fitting model ...") self.mcmc.sample(iter=iterations, burn=burn)
def test_multinomial_check_parameters(): x = np.array([1, 5]) n = x.sum() with pm.Model() as modelA: p_a = pm.Dirichlet("p", floatX(np.ones(2))) MultinomialA("x", n, p_a, observed=x) with pm.Model() as modelB: p_b = pm.Dirichlet("p", floatX(np.ones(2))) MultinomialB("x", n, p_b, observed=x) assert np.isclose(modelA.logp({"p_simplex__": [0]}), modelB.logp({"p_simplex__": [0]}))
def run_Categorical_Normal(): nC = 3 #Num. Clusters aD = [0, 1, 8, 9, 20, 21] #Data Points nPts = len(aD) + 1 #Clusters aUh = [ pm.Uniform('UnifH' + str(i), lower=-50, upper=50) for i in range(nC) ] # @UndefinedVariable aNc = [pm.Normal('NormC' + str(i), mu=aUh[i], tau=1) for i in range(nC)] # @UndefinedVariable #Dirichlet & Categorical Nodes Dir = pm.Dirichlet('Dirichlet', theta=[1] * nC) # @UndefinedVariable aC = [pm.Categorical('Cat' + str(i), Dir) for i in range(nPts)] # @UndefinedVariable aL = [ pm.Lambda('p_Norm' + str(i), lambda k=aC[i], aNcl=aNc: aNcl[int(k)]) for i in range(nPts) ] # @UndefinedVariable #Points aN = [ pm.Normal('NormX' + str(i), mu=aL[i], tau=1, observed=True, value=aD[i]) for i in range(nPts - 1) ] # @UndefinedVariable Nz = pm.Normal('NormZ', mu=aL[-1], tau=1) # @UndefinedVariable return np.concatenate([[Nz, Dir], aUh, aNc, aC, aN])
def getModel(): nA, nK = 0.05, 4 aDir = [nA / nK] * nK D = pm.Dirichlet('1-Dirichlet', theta=aDir) #@UndefinedVariable C1 = pm.Categorical('2-Cat', D) #@UndefinedVariable # C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable # C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable # C4 = pm.Categorical('14-Cat', D); #@UndefinedVariable # C5 = pm.Categorical('15-Cat', D); #@UndefinedVariable # G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5); #@UndefinedVariable N0_1 = pm.Normal('5-Norm0_1', mu=10, tau=1) #@UndefinedVariable N0_2 = pm.Normal('6-Norm0_2', mu=-10, tau=1) #@UndefinedVariable N0_3 = pm.Normal('7-Norm0_3', mu=30, tau=1) #@UndefinedVariable N0_4 = pm.Normal('16-Norm0_3', mu=-30, tau=1) #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value, N0_4.value] p_N1 = pm.Lambda('p_Norm1', lambda n=C1: aMu[n], doc='Pr[Norm|Cat]') # p_N2 = pm.Lambda('p_Norm2', lambda n=C2: aMu[n], doc='Pr[Norm|Cat]'); # p_N3 = pm.Lambda('p_Norm3', lambda n=C3: aMu[n], doc='Pr[Norm|Cat]'); # p_N4 = pm.Lambda('p_Norm4', lambda n=C4: aMu[n], doc='Pr[Norm|Cat]'); # p_N5 = pm.Lambda('p_Norm6', lambda n=C5: aMu[n], doc='Pr[Norm|Cat]'); N = pm.Normal('3-Norm', mu=p_N1, tau=1) #@UndefinedVariable # obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable # obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable # obsN3 = pm.Normal('12-Norm', mu=p_N4, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable # obsN4 = pm.Normal('13-Norm', mu=p_N5, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable return pm.Model([D, C1, N, N0_1, N0_2, N0_3, N0_4, N])
def mixture_model(random_seed=1234): """Sample mixture model to use in benchmarks""" np.random.seed(1234) size = 1000 w_true = np.array([0.35, 0.4, 0.25]) mu_true = np.array([0.0, 2.0, 5.0]) sigma = np.array([0.5, 0.5, 1.0]) component = np.random.choice(mu_true.size, size=size, p=w_true) x = np.random.normal(mu_true[component], sigma[component], size=size) with pm.Model() as model: w = pm.Dirichlet("w", a=np.ones_like(w_true)) mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape) enforce_order = pm.Potential( "enforce_order", at.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) + at.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf), ) tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape) pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x) # Initialization can be poorly specified, this is a hack to make it work start = { "mu": mu_true.copy(), "tau_log__": np.log(1.0 / sigma**2), "w_stickbreaking__": np.array([-0.03, 0.44]), } return model, start
def initialize_variables(self): """Initializes MCMC variables.""" self.dirichlet = pymc.Dirichlet( "dirichlet", self.prior_pops ) # This has size (n-1), so it is missing the final component. self.matrix_populations = pymc.CompletedDirichlet( "matrix_populations", self.dirichlet ) # This RV fills in the missing value of the population vector, but has shape (1, n) rather than (n) self.populations = pymc.CommonDeterministics.Index( "populations", self.matrix_populations, 0) # Finally, we get a flat array of the populations. self.dirichlet.keep_trace = False @pymc.dtrm def mu(populations=self.populations): return populations.dot(self.predictions) self.mu = mu @pymc.potential def logp(populations=self.populations, mu=self.mu): return -0.5 * get_chi2(populations, self.predictions, self.measurements, self.uncertainties, mu=mu) self.logp = logp
def run_HDP(): nG, nA, nC = 2, 2, 2 #Gamma, Alpha & Max No. Clusters aDir = [nG / nC] * nC Dir0 = pm.Dirichlet('Dirichlet0', theta=aDir) # @UndefinedVariable lDir0 = pm.Lambda('p_Dir0', lambda d=Dir0: np.concatenate([d, [1 - sum(d)]]) * nA) # @UndefinedVariable aNodes1 = get_DP('1', lDir0, [0, 1, 20, 21]) aNodes2 = get_DP('2', lDir0, [50, 51, 70, 71, 72]) return np.concatenate([[Dir0], aNodes1, aNodes2])
def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): pi = np.array([0.2, 0.5, 0.3]) mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] x = np.zeros((N, 3), dtype=np.float32) y = np.zeros((N, ), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal( mus[k], np.diag(stds[k])) y[n] = k return x, y N = 100 # number of data points K = 3 # number of mixture components D = 3 # dimensionality of the data X, y = build_toy_dataset(N, K) with pm.Model() as model: pi = pm.Dirichlet("pi", np.ones(K), shape=(K, )) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D)) packed_chol.append( pm.LKJCholeskyCov("chol_cov_%i" % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5))) chol.append( pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append( pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D)) pm.Mixture("x_obs", pi, comp_dist, observed=X) with model: idata = pm.sample(30, tune=10, chains=1) n_samples = 20 with model: ppc = pm.sample_posterior_predictive(idata, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) assert ppc["x_obs"].shape == (n_samples, ) + X.shape assert prior["x_obs"].shape == (n_samples, ) + X.shape assert prior["mu0"].shape == (n_samples, D) assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
def cartesian_categorical_child(name, parents, levels, value=None, N=None, return_coeffs=False, fixed={}): if value is None and N is None: raise ValueError('either "value" or "N" must be specified') if value is not None: value = mask_missing(value) ranges = [range(get_levels_count(p)) for p in parents] parents2index = {} coeffs = [] for i, parent_vals in enumerate(product(*ranges)): parents2index[parent_vals] = i parents_repr = ' '.join('%s=%s' % (parent, v) for parent, v in zip(parents, parent_vals)) coeff_name = COEFFS_PREFIX + 'p(%s | %s)' % (name, parents_repr) coeff = fixed.get(coeff_name, pymc.Dirichlet(coeff_name, theta=[1] * levels)) coeffs.append(coeff) intify = lambda x: tuple(map(int, x)) @pymc.deterministic def child_prob(parents=parents, coeffs=coeffs): probs = np.array([ coeffs[parents2index[intify(parent_vals)]] for parent_vals in zip(*parents) ]) remainders = 1 - probs.sum(axis=1) remainders = remainders.reshape((len(remainders), 1)) return np.hstack([probs, remainders]) child_prob.__name__ = 'p(%s)' % name if value is None: child = pymc.Categorical(name, p=child_prob, value=np.zeros(N)) else: child = pymc.Categorical(name, p=child_prob, value=value, observed=True) set_levels_count(child, levels) if return_coeffs: return child, coeffs + [child_prob] else: return child
def run_HDP(): nC = 3 #Max No. Clusters Gam = pm.Uniform('Gamma0', lower=0, upper=15) # @UndefinedVariable aDir = [Gam / nC] * nC Dir0 = pm.Dirichlet('Dirichlet0', theta=aDir) # @UndefinedVariable lDir0 = pm.Lambda('p_Dir0', lambda d=Dir0: np.concatenate([d, [1 - sum(d)]])) # @UndefinedVariable aNodes1 = get_DP('1', lDir0, [0, 1, 20, 21]) aNodes2 = get_DP('2', lDir0, [50, 51, 70, 71, 72]) return np.concatenate([[Dir0], aNodes1, aNodes2])
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[2, 1, 3, 1]) #@UndefinedVariable # p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]'); C = pm.Categorical('2-Cat', D) #@UndefinedVariable # C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable p_N = pm.Lambda('p_Norm', lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3], [-5, 0, 5, 10]), doc='Pr[Norm|Cat]') N = pm.Normal('3-Norm', mu=p_N, tau=1) #@UndefinedVariable # N = pm.Normal('2-Norm', mu=p_N, tau=1, observed=True, value=2.5); #@UndefinedVariable return pm.Model([D, C, N])
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[2, 1, 2, 4]) #@UndefinedVariable # p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]'); C = pm.Categorical('2-Cat', D) #@UndefinedVariable # C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable p_N = pm.Lambda( 'p_Norm', lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3], [[-5, -5], [0, 0], [5, 5], [10, 10]]), doc='Pr[Norm|Cat]') N = pm.MvNormal('3-Norm_2D', mu=p_N, tau=np.eye(2, 2)) #@UndefinedVariable # N = pm.MvNormal('2-Norm_2D', mu=p_N, tau=np.eye(2,2), observed=True, value=[2.5,2.5]); #@UndefinedVariable return pm.Model([D, C, N])
def gmm_model(data, K, mu_0=0.0, alpha_0=0.1, beta_0=0.1, alpha=1.0): """ K: number of component n_samples: number of n_samples n_features: number of features mu_0: prior mean of mu_k alpha_0: alpha of Inverse Gamma tau_k beta_0: beta of Inverse Gamma tau_k alpha = prior of dirichlet distribution phi_0 latent variable: phi_0: shape = (K-1, ), dirichlet distribution phi: shape = (K, ), add K-th value back to phi_0 z: shape = (n_samples, ), Categorical distribution, z[k] is component indicator mu_k: shape = (K, n_features), normal distribution, mu_k[k] is mean of k-th component tau_k : shape = (K, n_features), inverse-gamma distribution, tau_k[k] is variance of k-th component """ n_samples, n_features = data.shape # latent variables tau_k = pm.InverseGamma('tau_k', alpha_0 * np.ones((K, n_features)), beta_0 * np.ones((K, n_features)), value=beta_0 * np.ones((K, n_features))) mu_k = pm.Normal('mu_k', np.ones((K, n_features)) * mu_0, tau_k, value=np.ones((K, n_features)) * mu_0) phi_0 = pm.Dirichlet('phi_0', theta=np.ones(K) * alpha) @pm.deterministic(dtype=float) def phi(value=np.ones(K) / K, phi_0=phi_0): val = np.hstack((phi_0, (1 - np.sum(phi_0)))) return val z = pm.Categorical('z', p=phi, value=pm.rcategorical(np.ones(K) / K, size=n_samples)) # observed variables x = pm.Normal('x', mu=mu_k[z], tau=tau_k[z], value=data, observed=True) return pm.Model([mu_k, tau_k, phi_0, phi, z, x])
def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: probs = pm.Dirichlet("probs", a=np.ones(6)) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) burned_trace = pm.sample( 20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False ) sim_priors = pm.sample_prior_predictive( return_inferencedata=False, samples=20, model=dm_model ) sim_ppc = pm.sample_posterior_predictive( burned_trace, return_inferencedata=False, samples=20, model=dm_model ) assert sim_priors["probs"].shape == (20, 6) assert sim_priors["obs"].shape == (20,) + mn_data.shape assert sim_ppc["obs"].shape == (20,) + mn_data.shape
def create_mk_model(tree, chars, Qtype, pi): """ Create model objects to be passed to pymc.MCMC Creates Qparams and likelihood function """ if type(chars) == dict: chars = [chars[l] for l in [n.label for n in tree.leaves()]] nchar = len(set(chars)) if Qtype=="ER": N = 1 elif Qtype=="Sym": N = int(binom(nchar, 2)) elif Qtype=="ARD": N = int((nchar ** 2 - nchar)) else: ValueError("Qtype must be one of: ER, Sym, ARD") # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2 if N != 1: theta = [1.0/2.0]*N Qparams_init = pymc.Dirichlet("Qparams_init", theta, value = [0.5]) Qparams_init_full = pymc.CompletedDirichlet("Qparams_init_full", Qparams_init) else: Qparams_init_full = [[1.0]] # Exponential scaling factor for Qparams scaling_factor = pymc.Exponential(name="scaling_factor", beta=1.0, value=1.0) # Scaled Qparams; we would not expect them to necessarily add # to 1 as would be the case in a Dirichlet distribution @pymc.deterministic(plot=False) def Qparams(q=Qparams_init_full, s=scaling_factor): Qs = np.empty(N) for i in range(N): Qs[i] = q[0][i]*s return Qs l = mk.create_likelihood_function_mk(tree=tree, chars=chars, Qtype=Qtype, pi="Equal", findmin=False) @pymc.potential def mklik(q = Qparams, name="mklik"): return l(q) return locals()
def make_categorical(name, levels, value=None, N=None, return_coeffs=False, fixed={}): """ creates a Bernoulli random variable with a Dirichlet parent :param name: name of the variable :param levels: integer - how many levels does the variable have :param value: optional - list of observed values of the variable. Must consist of integers from 0 to levels - 1. May be a masked array - if the variable has missing values :param N: size of the variable (number of values). Either N or value must be specified :param return_coeffs: if true, will return the parent Beta variable as well as the bernoulli child. False by defaut. :param fixed: optional dictionary of values of coefficients to be fixed. :return: Categorical pymc random variable, or (if return_coeffs == True) a tuple (categorical variable; a list with a single element - the Dirichlet parent) """ if value is None and N is None: raise ValueError('either "value" or "N" must be specified') if value is not None: value = mask_missing(value) N = N or len(value) coeff_name = COEFFS_PREFIX + 'p(%s)' % name if coeff_name in fixed: probs = fixed[coeff_name] parent = list(probs) + [1 - sum(probs)] else: parent = pymc.Dirichlet(coeff_name, theta=[1] * levels) if value is None: child = pymc.Categorical(name, p=parent, value=np.zeros(N)) else: child = pymc.Categorical(name, p=parent, observed=True, value=value) set_levels_count(child, levels) if return_coeffs: return child, [parent] else: return child
def get_DP(sDP, lDir0, aD): nPts = len(aD) + 1 nC = len(lDir0.value) nMinD, nMaxD = min(aD), max(aD) #Clusters aUh = [ pm.Uniform('UnifH' + str(i) + '_' + sDP, lower=nMinD - 20, upper=nMaxD + 20) for i in range(nC) ] # @UndefinedVariable aNc = [ pm.Normal('NormC' + str(i) + '_' + sDP, mu=aUh[i], tau=1) for i in range(nC) ] # @UndefinedVariable #Dirichlet & Categorical Nodes Gam = pm.Uniform('Gamma1_' + sDP, lower=0, upper=15) # @UndefinedVariable Dir = pm.Dirichlet('Dirichlet1_' + sDP, theta=lDir0 * Gam) # @UndefinedVariable aC = [ pm.Categorical('Cat' + str(i) + '_' + sDP, Dir) for i in range(nPts) ] # @UndefinedVariable aL = [ pm.Lambda('p_Norm' + str(i) + '_' + sDP, lambda k=aC[i], aNcl=aNc: aNcl[int(k)]) for i in range(nPts) ] # @UndefinedVariable #Points aN = [ pm.Normal('NormX' + str(i) + '_' + sDP, mu=aL[i], tau=1, observed=True, value=aD[i]) for i in range(nPts - 1) ] # @UndefinedVariable Nz = pm.Normal('NormZ_' + sDP, mu=aL[-1], tau=1) # @UndefinedVariable return np.concatenate([[Nz, Dir], aUh, aNc, aC, aN])
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[3,2,4]); #@UndefinedVariable C1 = pm.Categorical('2-Cat', D); #@UndefinedVariable C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable W0_0 = pm.WishartCov('4-Wishart0_1', n=5, C=np.eye(2)); #@UndefinedVariable N0_1 = pm.MvNormalCov('5-Norm0_1', mu=[-20,-20], C=np.eye(2)); #@UndefinedVariable N0_2 = pm.MvNormalCov('6-Norm0_2', mu=[0,0], C=np.eye(2)); #@UndefinedVariable N0_3 = pm.MvNormalCov('7-Norm0_3', mu=[20,20], C=np.eye(2)); #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value]; fL1 = lambda n=C1: np.select([n==0, n==1, n==2], aMu); fL2 = lambda n=C2: np.select([n==0, n==1, n==2], aMu); fL3 = lambda n=C3: np.select([n==0, n==1, n==2], aMu); p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]'); p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]'); p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]'); N = pm.MvNormalCov('3-Norm', mu=p_N1, C=W0_0); #@UndefinedVariable obsN1 = pm.MvNormalCov('8-Norm', mu=p_N2, C=W0_0, observed=True, value=[-20,-20]); #@UndefinedVariable @UnusedVariable obsN2 = pm.MvNormalCov('9-Norm', mu=p_N3, C=W0_0, observed=True, value=[20,20]); #@UndefinedVariable @UnusedVariable return pm.Model([D,C1,C2,C3,N,W0_0,N0_1,N0_2,N0_3,N,obsN1,obsN2]);
def get_Models(): #Full Model (Dirichlet & Categorical) aAlphas = [1, 2, 8, 2] aD = [0, 3, 1] Dir = pm.Dirichlet('Dir', theta=aAlphas) # @UndefinedVariable CatD = [ pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i]) for i in range(len(aD)) ] # @UndefinedVariable @UnusedVariable CatQ = pm.Categorical('CatQ', p=Dir) # @UndefinedVariable #Collapsed Model (Categorical) aP = [] for i in range(len(aAlphas)): #For each Category, get its probability p_i aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD))) CatQ2 = pm.Categorical('CatQ2', p=aP) # @UndefinedVariable return np.concatenate([[Dir, CatQ, CatQ2], CatD])
def sample_prior_pops(num_frames, bootstrap_index_list): """Sample the prior populations using a Dirchlet random variable. Parameters ---------- num_frames : int Number of conformations bootstrap_index_list : list([ndarray]) List of arrays of frame indices. The indices in bootstrap_index_list[i] will be perturbed together Returns ------- prior_populations : ndarray, shape = (num_frames) Prior populations of each conformation Notes ------- This function allows you to perform Bayesian bootstrapping by modifying the prior populations attached to each frame. Because molecular dynamics frames are time correlated, one must first divide the dataset into temporal blocks. A dirichlet random variable is then drawn to modify the prior populations blockwise. """ num_blocks = len(bootstrap_index_list) prior_dirichlet = pymc.Dirichlet("prior_dirichlet", np.ones(num_blocks)) # Draw a dirichlet block_pops = np.zeros(num_blocks) block_pops[: -1] = prior_dirichlet.value[:] # The pymc Dirichlet does not explicitly store the final component block_pops[-1] = 1.0 - block_pops.sum( ) # Calculate the final component from normalization. prior_populations = np.ones(num_frames) for k, ind in enumerate(bootstrap_index_list): prior_populations[ind] = block_pops[k] / len(ind) return prior_populations
def run_DP(): aD = [-10, -9, 10, 11, 20, 21, 42, 43] #Data Points # nA, nC = 3, 3; #Alpha & Max No. Clusters nC = 5 nPts = len(aD) + 1 #Clusters aUh = [ pm.Uniform('UnifH' + str(i), lower=-50, upper=50) for i in range(nC) ] # @UndefinedVariable # Uh = pm.Uniform('UnifH', lower=-50, upper=60); # @UndefinedVariable aNc = [pm.Normal('NormC' + str(i), mu=aUh[i], tau=1) for i in range(nC)] # @UndefinedVariable #Dirichlet & Categorical Nodes Gam = pm.Uniform('UnifG', lower=0, upper=15) # @UndefinedVariable # Gam = pm.Gamma('Gamma', alpha=2.5, beta=2); # @UndefinedVariable Dir = pm.Dirichlet('Dirichlet', theta=[Gam / nC] * nC) # @UndefinedVariable aC = [pm.Categorical('Cat' + str(i), Dir) for i in range(nPts)] # @UndefinedVariable aL = [ pm.Lambda('p_Norm' + str(i), lambda k=aC[i], aNcl=aNc: aNcl[int(k)]) for i in range(nPts) ] # @UndefinedVariable #Points aN = [ pm.Normal('NormX' + str(i), mu=aL[i], tau=1, observed=True, value=aD[i]) for i in range(nPts - 1) ] # @UndefinedVariable Nz = pm.Normal('NormZ', mu=aL[-1], tau=1) # @UndefinedVariable return np.concatenate([[Nz, Dir, Gam], aUh, aNc, aC, aN])
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[3, 2, 4]) #@UndefinedVariable C1 = pm.Categorical('2-Cat', D) #@UndefinedVariable C2 = pm.Categorical('10-Cat', D) #@UndefinedVariable C3 = pm.Categorical('11-Cat', D) #@UndefinedVariable G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5) #@UndefinedVariable U1 = pm.Uniform('12-Unif', lower=-100, upper=500) #@UndefinedVariable U2 = pm.Uniform('13-Unif', lower=-100, upper=500) #@UndefinedVariable U3 = pm.Uniform('14-Unif', lower=-100, upper=500) #@UndefinedVariable N0_1 = pm.Normal('5-Norm0_1', mu=U1, tau=1) #@UndefinedVariable N0_2 = pm.Normal('6-Norm0_2', mu=U2, tau=1) #@UndefinedVariable N0_3 = pm.Normal('7-Norm0_3', mu=U3, tau=1) #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value] fL1 = lambda n=C1: np.select([n == 0, n == 1, n == 2], aMu) fL2 = lambda n=C2: np.select([n == 0, n == 1, n == 2], aMu) fL3 = lambda n=C3: np.select([n == 0, n == 1, n == 2], aMu) p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]') p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]') p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]') N = pm.Normal('3-Norm', mu=p_N1, tau=1) #@UndefinedVariable obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=0) #@UndefinedVariable @UnusedVariable obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=150) #@UndefinedVariable @UnusedVariable return pm.Model( [D, C1, C2, C3, N, G0_0, N0_1, N0_2, N0_3, N, obsN1, obsN2])
def get_Models(): #Full Model (DP [Dirichlet] & Categorical) aD = [1, 0, 1] #Data Points nA, nC = 0.3, 3 #Alpha & Max No. Clusters aAlphas = [nA / nC] * nC Dir = pm.Dirichlet('Dir', theta=aAlphas) # @UndefinedVariable CatD = [ pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i]) for i in range(len(aD)) ] # @UndefinedVariable @UnusedVariable CatQ = pm.Categorical('CatQ', p=Dir) # @UndefinedVariable #Collapsed Model (Categorical) aP = [] for i in range(len(aAlphas)): #For each Category, get its probability p_i aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD))) CatQ2 = pm.Categorical('CatQ2', p=aP) # @UndefinedVariable return np.concatenate([[Dir, CatQ, CatQ2], CatD])
state = pm.Bernoulli('state', p=0.5, shape=len(day)) # Parameters alpha = pm.Normal('alpha', mu=0, tau=1E-3, shape=len(mtag[0])) beta = pm.Normal('beta', mu=0, tau=1E-3, shape=len(mtag[0])) ## Softmax def invlogit(x): return T.tensor.nnet.softmax(x) theta = np.empty(len(day), object) p_vec = np.empty(len(day), object) track_lk = np.empty(len(day), object) ## empty theta for i, j in enumerate(day): theta[i] = alpha + T.dot(state[i], beta.T) p_vec[i] = invlogit(theta[i]) # Data likelihood track_lk[i] = pm.Dirichlet('track_lk', a=p_vec[i], shape=len(mtag[0]), observed=mtag[i]) with per_model: # start = pm.find_MAP() step = pm.Metropolis() nsteps = 1000 trace = pm.sample(nsteps, step)
def buildGaussMixture1DModel(halos, ngauss, modeltype='ratio'): parts = {} ### PDF handling massnorm = 1e15 masses = halos[0]['masses'] nmasses = len(masses) nclusters = len(halos) delta_masses = np.zeros((nclusters, nmasses - 1)) delta_mls = np.zeros((nclusters, nmasses)) pdfs = np.zeros((nclusters, nmasses)) #also need to collect some statistics, to init mixture model pdfmeans = np.zeros(nclusters) pdfwidths = np.zeros(nclusters) for i in range(nclusters): if modeltype == 'additive': delta_masses[i, :] = (masses[1:] - masses[:-1]) / massnorm delta_mls[i, :] = (masses - halos[i]['true_mass']) / massnorm pdfs[i, :] = halos[i][ 'pdf'] * massnorm #preserve unitarity under integration elif modeltype == 'ratio': delta_masses[i, :] = (masses[1:] - masses[:-1]) / halos[i]['true_mass'] delta_mls[i, :] = masses / halos[i]['true_mass'] pdfs[i, :] = halos[i]['pdf'] * halos[i]['true_mass'] pdfmeans[i] = scipy.integrate.trapz(delta_mls[i, :] * pdfs[i, :], delta_mls[i, :]) pdfwidths[i] = np.sqrt( scipy.integrate.trapz( pdfs[i, :] * (delta_mls[i, :] - pdfmeans[i])**2, delta_mls[i, :])) datacenter = np.mean(pdfmeans) dataspread = np.std(pdfmeans) datatypvar = np.mean(pdfwidths) dataminsamp = np.min(delta_masses) print datacenter, dataspread, datatypvar, dataminsamp #### Mixture model priors piprior = pymc.Dirichlet('piprior', np.ones(ngauss)) parts['piprior'] = piprior mu0 = pymc.Uninformative( 'mu0', datacenter + np.random.uniform(-5 * dataspread, 5 * dataspread)) parts['mu0'] = mu0 # kelly07 xvars prior. # w2 = pymc.Uniform('w2', 0.1/dataspread**2., 100*max(1./dataspread**2, 1./datatypvar**2)) # print w2.parents # parts['w2'] = w2 # # # xvars = pymc.InverseGamma('xvars', 0.5, 0.5*w2, size=ngauss+1) #dropping the 1/2 factor on w2, because I don't think it matters logxsigma = pymc.Uniform('logxsigma', np.log(2 * dataminsamp), np.log(5 * dataspread), size=ngauss + 1) parts['logxsigma'] = logxsigma @pymc.deterministic(trace=False) def xvars(logxsigma=logxsigma): return np.exp(logxsigma)**2 parts['xvars'] = xvars @pymc.deterministic(trace=False) def tauU2(xvars=xvars): return 1. / xvars[-1] parts['tauU2'] = tauU2 xmus = pymc.Normal('xmus', mu0, tauU2, size=ngauss) parts['xmus'] = xmus @pymc.observed def data(value=0., delta_mls=delta_mls, delta_masses=delta_masses, pdfs=pdfs, piprior=piprior, xmus=xmus, xvars=xvars): #complete pi pis = pymc.extend_dirichlet(piprior) # print pis # #enforce identiability by ranking means # for i in range(xmus.shape[0]-1): # if (xmus[i] >= xmus[i+1:]).any(): # raise pymc.ZeroProbability # return dlntools.pdfGaussMix1D(delta_mls=delta_mls, delta_masses=delta_masses, pdfs=pdfs, pis=pis, mus=xmus, tau2=xvars[:-1]) parts['data'] = data return parts
beta = var_params[i * 2 + 1] var_obs.append( pymc.Gamma("var_obs{}".format(i), alpha=alpha, beta=beta, value=means[i], observed=True)) var_pred.append(pymc.Gamma("var_pred{}".format(i), alpha=alpha, beta=beta)) probs = [trans_params[i * n_states + j] for j in range(n_states)] @pymc.deterministic def params(probs=probs): return np.array(probs) trans_obs.append( pymc.Dirichlet("trans_obs{}".format(i), params, value=transitions[i], observed=True)) trans_pred.append(pymc.Dirichlet("trans_pred{}".format(i), params)) pred = mean_pred[:] pred.extend(var_pred) pred.extend(trans_pred) model = pymc.Model(pred) M = pymc.MCMC(model) M.sample(1000, 200, 10) M.db.close()
def _create_parameter_model(self, database, initial_parameters): """ Creates set of stochastics representing the set of all parameters for all models Arguments --------- database : dict FreeSolv database initial_parameters : dict The set of initial values of the parameters Returns ------- parameters : dict PyMC dictionary containing the parameters to sample.\ """ parameters = dict() # just the parameters parameters['gbmodel_dir'] = pymc.Dirichlet('gbmodel_dir', np.ones([self.ngbmodels])) parameters['gbmodel_prior'] = pymc.CompletedDirichlet( 'gbmodel_prior', parameters['gbmodel_dir']) if self.ngbmodels == 5: parameters['gbmodel'] = pymc.Categorical( 'gbmodel', value=4, p=parameters['gbmodel_prior']) else: parameters['gbmodel'] = pymc.Categorical( 'gbmodel', p=parameters['gbmodel_prior']) uninformative_tau = 0.0001 joint_proposal_sets = {} for (key, value) in initial_parameters.iteritems(): (atomtype, parameter_name) = key.split('_') if parameter_name == 'scalingFactor': stochastic = pymc.Uniform(key, value=value, lower=-0.8, upper=+1.5) elif parameter_name == 'radius': stochastic = pymc.Uniform(key, value=value, lower=0.5, upper=2.5) elif parameter_name == 'alpha': stochastic = pymc.Normal(key, value=value, mu=value, tau=uninformative_tau) elif parameter_name == 'beta': stochastic = pymc.Normal(key, value=value, mu=value, tau=uninformative_tau) elif parameter_name == 'gamma': stochastic = pymc.Normal(key, value=value, mu=value, tau=uninformative_tau) else: raise Exception("Unrecognized parameter name: %s" % parameter_name) parameters[key] = stochastic self.stochastics_joint_proposal.append(stochastic) return parameters
alpha_vector = alpha # Matrix of inter-group correlations # DIMENSIONS: num_groups x num_groups # SUPPORT: [0,1] # DISTRIBUTION: None B_matrix = B #---------------------------- Prior Parameters ---------------------------# # Actual group membership probabilities for each person # DIMENSIONS: 1 x (num_people * num_groups) # SUPPORT: (0,1], Elements of each vector should sum to 1 for each person # DISTRIBUTION: Dirichlet(alpha) pi_list = np.empty(num_people, dtype=object) for person in range(num_people): person_pi = pymc.Dirichlet('pi_%i' % person, theta=alpha_vector) pi_list[person] = person_pi completed_pi_list = [ pymc.CompletedDirichlet('completed_pi_%d' % i, dist) for i, dist in enumerate(pi_list) ] # Indicator variables of whether the pth person is in a group or not # DIMENSIONS: 1 x (num_people^2) for each list, where each element is Kx1 # DOMAIN : {0,1}, only one element of vector is 1, all else 0 # DISTRIBUTION: Categorical (using Multinomial with 1 observation) z_pTq_matrix = np.empty([num_people, num_people], dtype=object) z_pFq_matrix = np.empty([num_people, num_people], dtype=object) for p_person in range(num_people): for q_person in range(num_people):