def test_mixture_of_mvn(self): mu1 = np.asarray([0., 1.]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1., 0.]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[.5, .5], mu1, mu2]) with Model() as model: w = Dirichlet('w', floatX(np.ones(2)), transform=None) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture('x_obs', w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2)) ).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st, axis=-1, keepdims=True) assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf(x=testpoint['w'], alpha=np.ones(2), ) assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def test_mixture_of_mvn(self): mu1 = np.asarray([0.0, 1.0]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1.0, 0.0]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[0.5, 0.5], mu1, mu2]) with Model() as model: w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2, )) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack(( st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2), )).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=False) assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf( x=testpoint["w"], alpha=np.ones(2), ) assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def doMCMC(n, nxx, nxy, nyy, x): # Optional setting for reproducibility use_seed = False d = nxx.shape[0] ns = 2000 if use_seed: # optional setting for reproducibility seed = 42 # Disable printing sys.stdout = open(os.devnull, 'w') # Sufficient statistics NXX = shared(nxx) NXY = shared(nxy) NYY = shared(nyy) # Define model and perform MCMC sampling with Model() as model: # Fixed hyperparameters for priors b0 = Deterministic('b0', th.zeros((d), dtype='float64')) ide = Deterministic('ide', th.eye(d, m=d, k=0, dtype='float64')) # Priors for parameters l0 = Gamma('l0', alpha=2.0, beta=2.0) l = Gamma('l', alpha=2.0, beta=2.0) b = MvNormal('b', mu=b0, tau=l0 * ide, shape=d) # Custom log likelihood def logp(xtx, xty, yty): return (n / 2.0) * th.log(l / (2 * np.pi)) + (-l / 2.0) * ( th.dot(th.dot(b, xtx), b) - 2 * th.dot(b, xty) + yty) # Likelihood delta = DensityDist('delta', logp, observed={ 'xtx': NXX, 'xty': NXY, 'yty': NYY }) # Inference print('doMCMC: start NUTS') step = NUTS() if use_seed: trace = sample(ns, step, progressbar=True, random_seed=seed) else: trace = sample(ns, step, progressbar=True) # Enable printing sys.stdout = sys.__stdout__ # Compute prediction over posterior return np.mean([np.dot(x, trace['b'][i]) for i in range(ns)], 0)
def doADVI(n, xx, xy, yy, x): d = xx.shape[0] ns = 5000 seed = 42 # for reproducibility # Disable printing sys.stdout = open(os.devnull, 'w') # Sufficient statistics NXX = shared(xx) NXY = shared(xy) NYY = shared(yy) # Define model and perform MCMC sampling with Model() as model: # Fixed hyperparameters for priors b0 = Deterministic('b0', th.zeros((d), dtype='float64')) ide = Deterministic('ide', th.eye(d, m=d, k=0, dtype='float64')) # Priors for parameters l0 = Gamma('l0', alpha=2.0, beta=2.0) l = Gamma('l', alpha=2.0, beta=2.0) b = MvNormal('b', mu=b0, tau=l0 * ide, shape=d) # Custom log likelihood def logp(xtx, xty, yty): return (n / 2.0) * th.log(l / (2 * np.pi)) + (-l / 2.0) * ( th.dot(th.dot(b, xtx), b) - 2 * th.dot(b, xty) + yty) # Likelihood delta = DensityDist('delta', logp, observed={ 'xtx': NXX, 'xty': NXY, 'yty': NYY }) # Inference v_params = advi(n=ns, random_seed=seed) trace = sample_vp(v_params, draws=ns, random_seed=seed) # Enable printing sys.stdout = sys.__stdout__ # Compute prediction over posterior return np.mean([np.dot(x, trace['b'][i]) for i in range(ns)], 0)
def run_mv_model(data, K=3, n_feats=2, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples = len(data) tau = pm.Deterministic('tau', pm.floatX(tt.eye(n_feats) * 10)) mus = 0. if mus is None else mus mus = MvNormal('mus', mu=mus, tau=tau, shape=(K, n_feats)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], tau=tt.eye(n_feats), observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'tau']) plt.title('mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) return model, mod, trace
] return tt.sum( logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0)) return logp_ # Sparse model with diagonal covariance: with pm.Model() as model: # Weights of each component: w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, )) # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout. mus_signal = MvNormal( 'mus_signal', mu=pm.floatX(signalMean_priorMean), tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2), shape=n_dimensions) mus_background = MvNormal('mus_background', mu=pm.floatX(backgroundMean_priorMean), tau=pm.floatX( np.eye(n_dimensions) / backgroundMean_priorSD**2), shape=n_dimensions) mus = tt.fill_diagonal( tt.reshape(tt.tile(mus_background, n_components), (n_components, n_dimensions)), 0) + tt.eye(n_components, n_dimensions) * mus_signal # Impose structure for covariance as well, with off-diagonal elements being zero, just because that model is easier to fit. sigmas_signal = pm.Gamma('sigmas_signal',
n_comp = 2 concentration = 1 with pm.Model() as model: # Prior for covariance matrix # packed_L = [pm.LKJCholeskyCov('packedL_%d' % i, n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) for i in range(n_comp)] # L = [pm.expand_packed_triangular(dimensions, packed_L[i]) for i in range(n_comp)] # Σ = [pm.Deterministic('Σ_%d' % i, L[i].dot(L[i].T)) for i in range(n_comp)] packed_L = pm.LKJCholeskyCov('packedL', n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) L = pm.expand_packed_triangular(dimensions, packed_L) Σ = pm.Deterministic('Σ', L.dot(L.T)) # Prior for mean: mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(dimensions)), tau=pm.floatX(0.1 * np.eye(2)), shape=(dimensions,)) for i in range(n_comp)] # Prior for weights: pi = Dirichlet('pi', a=pm.floatX(concentration * np.ones(n_comp)), shape=(n_comp,)) prior = sample_prior() x = pm.DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data) # Plot prior for some parameters: # print(prior.keys()) # plt.hist(prior['Σ'][:,0,1]) with model: %time hmc_trace = pm.sample(draws=250, tune=100, cores=4) with model: %time fit_advi = pm.fit(n=50000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi')
(delta(mu).dot(tau) * delta(mu)).sum(axis=1)) # Log likelihood of Gaussian mixture distribution def logp_gmix(mus, pi, taus, n_components): def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)] return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0)) return logp_ ## Prior for model: componentMean = ms + np.random.uniform(0,5,n_dimensions) componentTau = np.random.uniform(0,2,n_dimensions) * np.eye(n_dimensions) with pm.Model() as model: mus = MvNormal('mu', mu=pm.floatX(componentMean), tau=pm.floatX(componentTau), shape=(n_components, n_dimensions)) pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(n_components)), shape=(n_components,)) packed_L = [pm.LKJCholeskyCov('packed_L_%d' % i, n=n_dimensions, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(n_components)] L = [pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components)] sigmas = [pm.Deterministic('sigma_%d' % i, tt.dot(L[i],L[i].T)) for i in range(n_components)] taus = [tt.nlinalg.matrix_inverse(sigmas[i]) for i in range(n_components)] xs = DensityDist('x', logp_gmix(mus, pi, taus, n_components), observed=data) with model: advi_fit = pm.fit(n=500000, obj_optimizer=pm.adagrad(learning_rate=1e-1)) advi_trace = advi_fit.sample(10000) advi_summary = pm.summary(advi_trace) pickle_out = open("advi_summary.pickle","wb") pickle.dump(advi_summary, pickle_out)
xs = [z[:, np.newaxis] * rng.multivariate_normal(m, np.eye(2), size=n_samples) for z, m in zip(zs, ms)] data = np.sum(np.dstack(xs), axis=2) plt.figure(figsize=(5, 5)) plt.scatter(data[:, 0], data[:, 1], c='g', alpha=0.5) plt.scatter(ms[0, 0], ms[0, 1], c='r', s=100) plt.scatter(ms[1, 0], ms[1, 1], c='b', s=100) from pymc3.math import logsumexp #Model original with pm.Model() as model: mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(2)), tau=pm.floatX(0.1 * np.eye(2)), shape=(2,)) for i in range(2)] pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(2)), shape=(2,)) xs = DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data) # # #Model for GMM clustering # with pm.Model() as model: # # cluster sizes # p = pm.Dirichlet('p', a=np.array([1., 1.]), shape=2) # # ensure all clusters have some points # p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0)) # #