def _test(alpha, beta, n): rv = InverseGamma(alpha=alpha, beta=beta) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) alpha = alpha.eval() beta = beta.eval() assert np.allclose( rv.log_prob(x_tf).eval(), stats.invgamma.logpdf(x, alpha, scale=beta))
def _test(alpha, beta, n): rv = InverseGamma(alpha=alpha, beta=beta) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) alpha = alpha.eval() beta = beta.eval() assert np.allclose(rv.log_prob(x_tf).eval(), stats.invgamma.logpdf(x, alpha, scale=beta))
def __init__(self, K, D, N, nu, use_param=False): self.K = K # number of topics self.D = D # number of documents self.N = N # number of words of each document self.nu = nu self.alpha = alpha = tf.zeros([K]) + 0.1 self.sigmasq = InverseGamma(tf.ones(nu), tf.ones(nu), sample_shape=K) self.sigma = sigma = tf.sqrt(self.sigmasq) self.mu = mu = Normal(tf.zeros(nu), tf.ones(nu), sample_shape=K) self.theta = theta = [None] * D self.z = z = [None] * D self.w = w = [None] * D for d in range(D): theta[d] = Dirichlet(alpha) if use_param: w[d] = ParamMixture(mixing_weights=theta[d], component_params={ 'loc': mu, 'scale_diag': sigma }, component_dist=MultivariateNormalDiag, sample_shape=N[d]) z[d] = w[d].cat else: z[d] = Categorical(probs=theta[d], sample_shape=N[d]) components = [ MultivariateNormalDiag(loc=tf.gather(mu, k), scale_diag=tf.gather(self.sigma, k), sample_shape=N[d]) for k in range(K) ] w[d] = Mixture(cat=z[d], components=components, sample_shape=N[d])
def __init__(self, num_data, num_cluster, vector_dim, num_mcmc_sample): self.K = num_cluster self.D = vector_dim self.N = num_data self.pi = Dirichlet(tf.ones(self.K)) self.mu = Normal(tf.zeros(self.D), tf.ones(self.D), sample_shape=self.K) self.sigmasq = InverseGamma(tf.ones(self.D), tf.ones(self.D), sample_shape=self.K) self.x = ParamMixture(self.pi, { 'loc': self.mu, 'scale_diag': tf.sqrt(self.sigmasq) }, MultivariateNormalDiag, sample_shape=self.N) self.z = self.x.cat self.T = num_mcmc_sample # number of MCMC samples self.qpi = Empirical(tf.Variable(tf.ones([self.T, self.K]) / self.K)) self.qmu = Empirical(tf.Variable(tf.zeros([self.T, self.K, self.D]))) self.qsigmasq = Empirical( tf.Variable(tf.ones([self.T, self.K, self.D]))) self.qz = Empirical( tf.Variable(tf.zeros([self.T, self.N], dtype=tf.int32)))
def klqp(self, docs, S, T, wordVec): K = self.K D = self.D nu = self.nu self.latent_vars = latent_vars = {} training_data = {} qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])), scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.mu] = qmu qsigmasq = InverseGamma(tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))), tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.sigmasq] = qsigmasq for d in range(D): training_data[self.w[d]] = docs[d] self.qmu = qmu self.qsigma = qsigma = tf.sqrt(qsigmasq) self.qw = MultivariateNormalDiag(loc=qmu, scale_diag=qsigma) V = len(wordVec) logprobs = [None] * V for i in range(V): logprobs[i] = self.qw.log_prob(wordVec[i]) self.qbeta = tf.convert_to_tensor(logprobs) self.inference = ed.KLqp(latent_vars, data=training_data) self.inference.initialize(n_iter=T, n_print=10, n_samples=S) self.__run_inference__(T)
def main(_): # Data generation (known mean) xn_data = np.random.normal(FLAGS.loc, FLAGS.scale, FLAGS.N) print("scale: {}".format(FLAGS.scale)) # Prior definition alpha = 0.5 beta = 0.7 # Posterior inference # Probabilistic model ig = InverseGamma(alpha, beta) xn = Normal(FLAGS.loc, tf.sqrt(ig), sample_shape=FLAGS.N) # Inference qig = Empirical(params=tf.get_variable( "qig/params", [1000], initializer=tf.constant_initializer(0.5))) proposal_ig = InverseGamma(2.0, 2.0) inference = ed.MetropolisHastings({ig: qig}, {ig: proposal_ig}, data={xn: xn_data}) inference.run() sess = ed.get_session() print("Inferred scale: {}".format(sess.run(tf.sqrt(qig.mean()))))
import tensorflow as tf from edward.models import Normal, InverseGamma, PointMass, Uniform, TransformedDistribution # simulate data d = 50 T = 300 X, C, S = MOU_sim(N=d, Sigma=None, mu=0, T=T, connectivity_strength=8.) # the model mu = tf.constant(0.) # Normal(loc=tf.zeros([d]), scale=1.*tf.ones([d])) beta = Normal(loc=tf.ones([d, d]), scale=2. * tf.ones([d, d])) ds = tf.contrib.distributions C = TransformedDistribution(distribution=beta, bijector=ds.bijectors.Exp(), name="LogNormalTransformedDistribution") noise_proc = InverseGamma(concentration=tf.ones([d]), rate=tf.ones([d])) # tf.constant(0.1) noise_obs = tf.constant(0.1) # InverseGamma(alpha=1.0, beta=1.0) x = [0] * T x[0] = Normal(loc=mu, scale=10. * tf.ones([d])) for n in range(1, T): x[n] = Normal(loc=mu + tf.tensordot(C, x[n - 1], axes=[[1], [0]]), scale=noise_proc * tf.ones([d])) ## map inference #print("setting up distributions") #qmu = PointMass(params=tf.Variable(tf.zeros([d]))) #qbeta = PointMass(params=tf.Variable(tf.zeros([d,d]))) #print("constructing inference object") #inference = ed.MAP({beta: qbeta, mu: qmu}, data={xt: xt_true for xt, xt_true in zip(x, X)}) #print("running inference") #inference.run()
plt.title("Simulated dataset") plt.show() K = 2 D = 2 model = MixtureGaussian(K, D) qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K]))) qmu_mu = tf.Variable(tf.random_normal([K * D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qpi = Dirichlet(alpha=qpi_alpha) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta) data = {'x': x_train} inference = ed.KLqp({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model) inference.run(n_iter=2500, n_samples=10, n_minibatch=20) # Average per-cluster and per-data point likelihood over many posterior samples. log_liks = [] for s in range(100): zrep = {'pi': qpi.sample(()), 'mu': qmu.sample(()), 'sigma': qsigma.sample(())} log_liks += [model.predict(data, zrep)] log_liks = tf.reduce_mean(log_liks, 0)
def _test(alpha, beta, n): x = InverseGamma(alpha=alpha, beta=beta) val_est = get_dims(x.sample(n)) val_true = n + get_dims(alpha) assert val_est == val_true
true_pi = np.array([0.2, 0.3, 0.5], np.float32) N = 10000 K = len(true_mu) true_z = np.random.choice(np.arange(K), size=N, p=true_pi) x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt(true_sigmasq[true_z]) # Prior hyperparameters pi_alpha = np.ones(K, dtype=np.float32) mu_sigma = np.std(true_mu) sigmasq_alpha = 1.0 sigmasq_beta = 2.0 # Model pi = Dirichlet(pi_alpha) mu = Normal(0.0, mu_sigma, sample_shape=K) sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K) x = ParamMixture(pi, { 'mu': mu, 'sigma': tf.sqrt(sigmasq) }, Normal, sample_shape=N) z = x.cat # Conditionals mu_cond = ed.complete_conditional(mu) sigmasq_cond = ed.complete_conditional(sigmasq) pi_cond = ed.complete_conditional(pi) z_cond = ed.complete_conditional(z) sess = ed.get_session()
k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k])) return x N = 500 # number of data points K = 2 # number of components D = 2 # dimensionality of data ed.set_seed(42) x_train = build_toy_dataset(N) pi = Dirichlet(tf.ones(K)) mu = Normal(tf.zeros(D), tf.ones(D), sample_shape=K) sigmasq = InverseGamma(tf.ones(D), tf.ones(D), sample_shape=K) x = ParamMixture(pi, { 'loc': mu, 'scale_diag': tf.sqrt(sigmasq) }, MultivariateNormalDiag, sample_shape=N) z = x.cat T = 500 # number of MCMC samples qpi = Empirical( tf.get_variable("qpi/params", [T, K], initializer=tf.constant_initializer(1.0 / K))) qmu = Empirical( tf.get_variable("qmu/params", [T, K, D], initializer=tf.zeros_initializer())) qsigmasq = Empirical(
import edward as ed from edward.models import Normal, InverseGamma import tensorflow as tf import matplotlib.pyplot as plt import numpy as np N = 1000 D = np.random.normal(loc=3., scale=2., size=N) p_mu = Normal(0., 1.) p_s = InverseGamma( 1., 1.) # https://en.wikipedia.org/wiki/Inverse-gamma_distribution ed_normal = Normal(loc=p_mu, scale=p_s, sample_shape=N) q1 = Normal(loc=tf.get_variable("mu", []), scale=1.0) q2 = Normal(loc=tf.nn.softplus(tf.get_variable("sd", [])), scale=1.0) inference = ed.KLqp(latent_vars={p_mu: q1, p_s: q2}, data={ed_normal: D}) inference.run(n_iter=10000) print np.mean(D) print np.std(D) plt.hist2d(q1.sample(10000).eval(), q2.sample(10000).eval(), bins=200) plt.show()
import tensorflow as tf from edward.models import InverseGamma, Normal N = 1000 # Data generation (known mean) mu = 7.0 sigma = 0.55 xn_data = np.random.normal(mu, sigma, N) print('sigma={}'.format(sigma)) # Prior definition alpha = tf.Variable(0.9, dtype=tf.float32, trainable=False) beta = tf.Variable(0.5, dtype=tf.float32, trainable=False) # Posterior inference # Probabilistic model ig = InverseGamma(alpha, beta) xn = Normal(mu, tf.ones([N]) * tf.sqrt(ig)) # Variational model qig = InverseGamma(tf.nn.softplus(tf.Variable(tf.random_normal([]))), tf.nn.softplus(tf.Variable(tf.random_normal([])))) # Inference inference = ed.KLqp({ig: qig}, data={xn: xn_data}) inference.run(n_iter=2000, n_samples=150) sess = ed.get_session() print('Inferred sigma={}'.format(sess.run(tf.sqrt(qig.mean()))))
def main(_): # Generate data true_mu = np.array([-1.0, 0.0, 1.0], np.float32) * 10 true_sigmasq = np.array([1.0**2, 2.0**2, 3.0**2], np.float32) true_pi = np.array([0.2, 0.3, 0.5], np.float32) N = 10000 K = len(true_mu) true_z = np.random.choice(np.arange(K), size=N, p=true_pi) x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt( true_sigmasq[true_z]) # Prior hyperparameters pi_alpha = np.ones(K, dtype=np.float32) mu_sigma = np.std(true_mu) sigmasq_alpha = 1.0 sigmasq_beta = 2.0 # Model pi = Dirichlet(pi_alpha) mu = Normal(0.0, mu_sigma, sample_shape=K) sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K) x = ParamMixture(pi, { 'loc': mu, 'scale': tf.sqrt(sigmasq) }, Normal, sample_shape=N) z = x.cat # Conditionals mu_cond = ed.complete_conditional(mu) sigmasq_cond = ed.complete_conditional(sigmasq) pi_cond = ed.complete_conditional(pi) z_cond = ed.complete_conditional(z) sess = ed.get_session() # Initialize randomly pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z]) print('Initial parameters:') print('pi:', pi_est) print('mu:', mu_est) print('sigmasq:', sigmasq_est) print() # Gibbs sampler cond_dict = { pi: pi_est, mu: mu_est, sigmasq: sigmasq_est, z: z_est, x: x_data } t0 = time() T = 500 for t in range(T): z_est = sess.run(z_cond, cond_dict) cond_dict[z] = z_est pi_est, mu_est = sess.run([pi_cond, mu_cond], cond_dict) cond_dict[pi] = pi_est cond_dict[mu] = mu_est sigmasq_est = sess.run(sigmasq_cond, cond_dict) cond_dict[sigmasq] = sigmasq_est print('took %.3f seconds to run %d iterations' % (time() - t0, T)) print() print('Final sample for parameters::') print('pi:', pi_est) print('mu:', mu_est) print('sigmasq:', sigmasq_est) print() print() print('True parameters:') print('pi:', true_pi) print('mu:', true_mu) print('sigmasq:', true_sigmasq) print() plt.figure(figsize=[10, 10]) plt.subplot(2, 1, 1) plt.hist(x_data, 50) plt.title('Empirical Distribution of $x$') plt.xlabel('$x$') plt.ylabel('frequency') xl = plt.xlim() plt.subplot(2, 1, 2) plt.hist(sess.run(x, {pi: pi_est, mu: mu_est, sigmasq: sigmasq_est}), 50) plt.title("Predictive distribution $p(x \mid \mathrm{inferred }\ " "\pi, \mu, \sigma^2)$") plt.xlabel('$x$') plt.ylabel('frequency') plt.xlim(xl) plt.show()
def main(_): ed.set_seed(42) # DATA x_data = build_toy_dataset(FLAGS.N) # MODEL pi = Dirichlet(concentration=tf.ones(FLAGS.K)) mu = Normal(0.0, 1.0, sample_shape=[FLAGS.K, FLAGS.D]) sigma = InverseGamma(concentration=1.0, rate=1.0, sample_shape=[FLAGS.K, FLAGS.D]) c = Categorical(logits=tf.log(pi) - tf.log(1.0 - pi), sample_shape=FLAGS.N) x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c)) # INFERENCE qpi = Empirical( params=tf.get_variable("qpi/params", [FLAGS.T, FLAGS.K], initializer=tf.constant_initializer(1.0 / FLAGS.K))) qmu = Empirical( params=tf.get_variable("qmu/params", [FLAGS.T, FLAGS.K, FLAGS.D], initializer=tf.zeros_initializer())) qsigma = Empirical( params=tf.get_variable("qsigma/params", [FLAGS.T, FLAGS.K, FLAGS.D], initializer=tf.ones_initializer())) qc = Empirical(params=tf.get_variable("qc/params", [FLAGS.T, FLAGS.N], initializer=tf.zeros_initializer(), dtype=tf.int32)) gpi = Dirichlet(concentration=tf.constant([1.4, 1.6])) gmu = Normal(loc=tf.constant([[1.0, 1.0], [-1.0, -1.0]]), scale=tf.constant([[0.5, 0.5], [0.5, 0.5]])) gsigma = InverseGamma(concentration=tf.constant([[1.1, 1.1], [1.1, 1.1]]), rate=tf.constant([[1.0, 1.0], [1.0, 1.0]])) gc = Categorical(logits=tf.zeros([FLAGS.N, FLAGS.K])) inference = ed.MetropolisHastings(latent_vars={ pi: qpi, mu: qmu, sigma: qsigma, c: qc }, proposal_vars={ pi: gpi, mu: gmu, sigma: gsigma, c: gc }, data={x: x_data}) inference.initialize() sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t'] if t == 1 or t % inference.n_print == 0: qpi_mean, qmu_mean = sess.run([qpi.mean(), qmu.mean()]) print("") print("Inferred membership probabilities:") print(qpi_mean) print("Inferred cluster means:") print(qmu_mean)
import tensorflow as tf from edward.models import InverseGamma, Normal, Empirical N = 1000 # Data generation (known mean) loc = 7.0 scale = 0.7 xn_data = np.random.normal(loc, scale, N) print('sigma={}'.format(sigma)) # Prior definition alpha = tf.Variable(0.5, trainable=False) beta = tf.Variable(0.7, trainable=False) # Posterior inference # Probabilistic model ig = InverseGamma(alpha, beta) xn = Normal(loc, tf.ones([N]) * tf.sqrt(ig)) # Inference qig = Empirical(params=tf.Variable(tf.zeros(1000) + 0.5)) proposal_ig = InverseGamma(2.0, 2.0) inference = ed.MetropolisHastings({ig: qig}, {ig: proposal_ig}, data={xn: xn_data}) inference.run() sess = ed.get_session() print('Inferred sigma={}'.format(sess.run(tf.sqrt(qig.mean()))))
return x N = 500 # num data points K = 2 # num components D = 2 # dimensionality of data ed.set_seed(42) # DATA x_data = build_toy_dataset(N) # MODEL pi = Dirichlet(alpha=tf.constant([1.0] * K)) mu = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D])) sigma = InverseGamma(alpha=tf.ones([K, D]), beta=tf.ones([K, D])) c = Categorical(logits=ed.tile(ed.logit(pi), [N, 1])) x = Normal(mu=tf.gather(mu, c), sigma=tf.gather(sigma, c)) # INFERENCE T = 5000 qpi = Empirical(params=tf.Variable(tf.ones([T, K]) / K)) qmu = Empirical(params=tf.Variable(tf.zeros([T, K, D]))) qsigma = Empirical(params=tf.Variable(tf.ones([T, K, D]))) qc = Empirical(params=tf.Variable(tf.zeros([T, N], dtype=tf.int32))) gpi = Dirichlet(alpha=tf.constant([1.4, 1.6])) gmu = Normal(mu=tf.constant([[1.0, 1.0], [-1.0, -1.0]]), sigma=tf.constant([[0.5, 0.5], [0.5, 0.5]])) gsigma = InverseGamma(alpha=tf.constant([[1.1, 1.1], [1.1, 1.1]]), beta=tf.constant([[1.0, 1.0], [1.0, 1.0]]))
for name in sorted(train.keys(), key=lambda x: int(x.replace('Sky', ''))): (gal, hal) = train[name] Galaxy_Pos.append(gal[:nb_datapoints, :2]) Galaxy_E.append(gal[:nb_datapoints, 2:]) Halos_Pos.append(hal[3:3 + nb_components * 2].reshape(nb_components, 2)) print("Galaxy (X, Y):", len(Galaxy_Pos), Galaxy_Pos[0].shape) print("Galaxy (E1, E2):", len(Galaxy_E), Galaxy_E[0].shape) print("Halos (X, Y):", len(Halos_Pos), Halos_Pos[0].shape) # =========================================================================== # Create the model # =========================================================================== # latent variable z mu = Normal(mu=tf.zeros([nb_components, nb_features]), sigma=tf.ones([nb_components, nb_features])) sigma = InverseGamma(alpha=tf.ones([nb_components, nb_features]), beta=tf.ones([nb_components, nb_features])) cat = Categorical(logits=tf.zeros([nb_datapoints, nb_components])) components = [ MultivariateNormalDiag(mu=tf.ones([nb_datapoints, 1]) * mu[k], diag_stdev=tf.ones([nb_datapoints, 1]) * sigma[k]) for k in range(nb_components) ] x = Mixture(cat=cat, components=components) # ====== inference ====== # qmu = Normal(mu=tf.Variable(tf.random_normal([nb_components, nb_features])), sigma=tf.nn.softplus( tf.Variable(tf.zeros([nb_components, nb_features])))) qsigma = InverseGamma(alpha=tf.nn.softplus( tf.Variable(tf.random_normal([nb_components, nb_features]))), beta=tf.nn.softplus(
#x_train, y_train = X_cent , Y x_train, z_train, y_train = X1.astype('float32'), Z.astype( 'float32'), Y.flatten() D = x_train.shape[1] # num features Db = z_train.shape[1] # MODEL Wf = Normal(mu=tf.zeros([D]), sigma=tf.ones([D])) Wb = Normal(mu=tf.zeros([Db]), sigma=tf.ones([Db])) Ib = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) Xnew = tf.placeholder(tf.float32, shape=(None, D)) Znew = tf.placeholder(tf.float32, shape=(None, Db)) ynew = tf.placeholder(tf.float32, shape=(None, )) sigma2 = InverseGamma(alpha=tf.ones(1), beta=tf.ones(1)) y = Normal(mu=ed.dot(x_train, Wf) + ed.dot(z_train, Wb) + Ib, sigma=sigma2) # INFERENCE sess = ed.get_session() T = 10000 qi = Empirical(params=tf.Variable(tf.zeros([T, 1]))) qw = Empirical(params=tf.Variable(tf.zeros([T, D]))) qb = Empirical(params=tf.Variable(tf.zeros([T, Db]))) qsigma2 = Empirical(params=tf.Variable(tf.ones([T, 1]))) inference = ed.SGHMC({ Wf: qw, Wb: qb, Ib: qi, sigma2: qsigma2
return x N = 500 # number of data points K = 2 # number of components D = 2 # dimensionality of data ed.set_seed(42) # DATA x_data = build_toy_dataset(N) # MODEL pi = Dirichlet(concentration=tf.constant([1.0] * K)) mu = Normal(loc=tf.zeros([K, D]), scale=tf.ones([K, D])) sigma = InverseGamma(concentration=tf.ones([K, D]), rate=tf.ones([K, D])) c = Categorical(logits=tf.tile(tf.reshape(ed.logit(pi), [1, K]), [N, 1])) x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c)) # INFERENCE T = 5000 qpi = Empirical(params=tf.Variable(tf.ones([T, K]) / K)) qmu = Empirical(params=tf.Variable(tf.zeros([T, K, D]))) qsigma = Empirical(params=tf.Variable(tf.ones([T, K, D]))) qc = Empirical(params=tf.Variable(tf.zeros([T, N], dtype=tf.int32))) gpi = Dirichlet(concentration=tf.constant([1.4, 1.6])) gmu = Normal(loc=tf.constant([[1.0, 1.0], [-1.0, -1.0]]), scale=tf.constant([[0.5, 0.5], [0.5, 0.5]])) gsigma = InverseGamma(concentration=tf.constant([[1.1, 1.1], [1.1, 1.1]]), rate=tf.constant([[1.0, 1.0], [1.0, 1.0]]))
import tensorflow as tf from edward.models import InverseGamma, Normal, Empirical N = 1000 # Data generation (known mean) mu = 7.0 sigma = 0.7 xn_data = np.random.normal(mu, sigma, N) print('sigma={}'.format(sigma)) # Prior definition alpha = tf.Variable(0.5, dtype=tf.float32, trainable=False) beta = tf.Variable(0.7, dtype=tf.float32, trainable=False) # Posterior inference # Probabilistic model ig = InverseGamma(alpha=alpha, beta=beta) xn = Normal(mu=mu, sigma=tf.ones([N]) * tf.sqrt(ig)) # Inference qig = Empirical(params=tf.Variable(tf.zeros(1000) + 0.5)) proposal_ig = InverseGamma(alpha=2.0, beta=2.0) inference = ed.MetropolisHastings({ig: qig}, {ig: proposal_ig}, data={xn: xn_data}) inference.run() sess = ed.get_session() print('Inferred sigma={}'.format(sess.run(tf.sqrt(qig.mean()))))
k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k])) return x N = 1000 # Number of data points K = 2 # Number of components D = 2 # Dimensionality of data # DATA x_data = build_toy_dataset(N) # MODEL pi = Dirichlet(concentration=tf.constant([1.0] * K)) mu = Normal(loc=tf.zeros([K, D]), scale=tf.ones([K, D])) sigma = InverseGamma(concentration=tf.ones([K, D]), rate=tf.ones([K, D])) c = Categorical(logits=tf.tile(tf.reshape(ed.logit(pi), [1, K]), [N, 1])) x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c)) # INFERENCE qpi = Dirichlet( concentration=tf.nn.softplus(tf.Variable(tf.random_normal([K])))) qmu = Normal(loc=tf.Variable(tf.random_normal([K, D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([K, D])))) qsigma = InverseGamma(concentration=tf.nn.softplus( tf.Variable(tf.random_normal([K, D]))), rate=tf.nn.softplus(tf.Variable(tf.random_normal([K, D])))) qc = Categorical(logits=tf.Variable(tf.zeros([N, K]))) inference = ed.KLqp(latent_vars={
#x_train, y_train = X_cent , Y x_train, z_train, y_train = X1.astype('float32'), Z.astype( 'float32'), Y.flatten() D = x_train.shape[1] # num features Db = z_train.shape[1] # MODEL Wf = Normal(loc=tf.zeros([D]), scale=tf.ones([D])) Wb = Normal(loc=tf.zeros([Db]), scale=tf.ones([Db])) Ib = Normal(loc=tf.zeros(1), scale=tf.ones(1)) Xnew = tf.placeholder(tf.float32, shape=(None, D)) Znew = tf.placeholder(tf.float32, shape=(None, Db)) ynew = tf.placeholder(tf.float32, shape=(None, )) sigma2 = InverseGamma(concentration=tf.ones(1) * .1, rate=tf.ones(1) * .1) #sigma2 = Normal(loc=tf.zeros([1]), scale=tf.ones([1])*100) y = Normal(loc=ed.dot(x_train, Wf) + ed.dot(z_train, Wb) + Ib, scale=tf.log(sigma2)) # INFERENCE sess = ed.get_session() T = 10000 qi = Empirical(params=tf.Variable(tf.zeros([T, 1]))) qw = Empirical(params=tf.Variable(tf.zeros([T, D]))) qb = Empirical(params=tf.Variable(tf.zeros([T, Db]))) qsigma2 = Empirical(params=tf.Variable(tf.ones([T, 1]))) inference = ed.SGHMC({ Wf: qw, Wb: qb,
plt.title("Simulated dataset") plt.show() K = 2 D = 2 model = MixtureGaussian(K, D) qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K]))) qmu_mu = tf.Variable(tf.random_normal([K * D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qpi = Dirichlet(alpha=qpi_alpha) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta) data = {'x': x_train} inference = ed.KLqp({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model) inference.run(n_iter=2500, n_samples=10, n_minibatch=20) # Average per-cluster and per-data point likelihood over many posterior samples. log_liks = [] for s in range(100): zrep = { 'pi': qpi.sample(()), 'mu': qmu.sample(()), 'sigma': qsigma.sample(()) } log_liks += [model.predict(data, zrep)]
true_sigma = np.array([1.0, 1.0, 1.0], np.float32) true_pi = np.array([.2, .3, .5], np.float32) K = len(true_pi) true_z = np.random.choice(np.arange(K), p=true_pi, size=N) x = true_mu[true_z] + np.random.randn(N) * true_sigma[true_z] # plt.hist(x, bins=200) # plt.show() # we like to calculate posterior p(\theta|x) where \theta=[mu_1,..., mu_3, sigma_1,...,sigma_3, z_1,...,z_3] # Model pi = Dirichlet(np.ones(K, np.float32)) mu = Normal(0.0, 9.0, sample_shape=[K]) sigma = InverseGamma(1.0, 1.0, sample_shape=[K]) c = Categorical(logits=tf.log(pi) - tf.log(1.0 - pi), sample_shape=N) ed_x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c)) # parameters q_pi = Dirichlet( tf.nn.softplus( tf.get_variable("qpi", [K], initializer=tf.constant_initializer(1.0 / K)))) q_mu = Normal(loc=tf.get_variable("qmu", [K]), scale=1.0) q_sigma = Normal(loc=tf.nn.softplus(tf.get_variable("qsigma", [K])), scale=1.0) inference = ed.KLqp(latent_vars={ mu: q_mu, sigma: q_sigma
D = train_img.shape[1] T = 800 # number of MCMC samples M = 300 # number of posterior samples sampled ed.set_seed(67) with tf.name_scope("model"): pi = Dirichlet(concentration=tf.constant([1.0] * K, name="pi/weights"), name="pi") mu = Normal(loc=tf.zeros(D, name="centroids/loc"), scale=tf.ones(D, name="centroids/scale"), sample_shape=K, name="centroids") sigma = InverseGamma(concentration=tf.ones( D, name="variability/concentration"), rate=tf.ones(D, name="variability/rate"), sample_shape=K, name="variability") x = ParamMixture(pi, { 'loc': mu, 'scale_diag': tf.sqrt(sigma) }, MultivariateNormalDiag, sample_shape=N, name="mixture") z = x.cat with tf.name_scope("posterior"): qpi = Empirical( tf.get_variable("qpi/params", [T, K],
N = 500 # number of data points K = 2 # number of components D = 2 # dimensionality of data ed.set_seed(42) # DATA x_train = build_toy_dataset(N) plt.scatter(x_train[:, 0], x_train[:, 1]) plt.axis([-3, 3, -3, 3]) plt.title("Simulated dataset") plt.show() # MODEL mu = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D])) sigma = InverseGamma(alpha=tf.ones([K, D]), beta=tf.ones([K, D])) cat = Categorical(logits=tf.zeros([N, K])) components = [ MultivariateNormalDiag(mu=tf.ones([N, 1]) * tf.gather(mu, k), diag_stdev=tf.ones([N, 1]) * tf.gather(sigma, k)) for k in range(K) ] x = Mixture(cat=cat, components=components) # INFERENCE qmu = Normal(mu=tf.Variable(tf.random_normal([K, D])), sigma=tf.nn.softplus(tf.Variable(tf.zeros([K, D])))) qsigma = InverseGamma(alpha=tf.nn.softplus( tf.Variable(tf.random_normal([K, D]))), beta=tf.nn.softplus(tf.Variable(tf.random_normal([K, D]))))
def __init__(self, n, xdim, n_mixtures=5, mc_samples=500): # Compute the shape dynamically from placeholders self.x_ph = tf.placeholder(tf.float32, [None, xdim]) self.k = k = n_mixtures self.batch_size = n self.d = d = xdim self.sample_size = tf.placeholder(tf.int32, ()) # Build the priors over membership probabilities and mixture parameters with tf.variable_scope("priors"): pi = Dirichlet(tf.ones(k)) mu = Normal(tf.zeros(d), tf.ones(d), sample_shape=k) sigmasq = InverseGamma(tf.ones(d), tf.ones(d), sample_shape=k) # Build the conditional mixture model with tf.variable_scope("likelihood"): x = ParamMixture(pi, {'loc': mu, 'scale_diag': tf.sqrt(sigmasq)}, MultivariateNormalDiag, sample_shape=n) z = x.cat # Build approximate posteriors as Empirical samples t = mc_samples with tf.variable_scope("posteriors_samples"): qpi = Empirical(tf.get_variable( "qpi/params", [t, k], initializer=tf.constant_initializer(1.0 / k))) qmu = Empirical(tf.get_variable( "qmu/params", [t, k, d], initializer=tf.zeros_initializer())) qsigmasq = Empirical(tf.get_variable( "qsigmasq/params", [t, k, d], initializer=tf.ones_initializer())) qz = Empirical(tf.get_variable( "qz/params", [t, n], initializer=tf.zeros_initializer(), dtype=tf.int32)) # Build inference graph using Gibbs and conditionals with tf.variable_scope("inference"): self.inference = ed.Gibbs({ pi: qpi, mu: qmu, sigmasq: qsigmasq, z: qz }, data={ x: self.x_ph }) self.inference.initialize() # Build predictive posterior graph by taking samples n_samples = self.sample_size with tf.variable_scope("posterior"): mu_smpl = qmu.sample(n_samples) # shape: [1, 100, k, d] sigmasq_smpl = qsigmasq.sample(n_samples) x_post = Normal( loc=tf.ones((n, 1, 1, 1)) * mu_smpl, scale=tf.ones((n, 1, 1, 1)) * tf.sqrt(sigmasq_smpl) ) # NOTE: x_ph has shape [n, d] x_broadcasted = tf.tile( tf.reshape(self.x_ph, (n, 1, 1, d)), (1, n_samples, k, 1) ) x_ll = x_post.log_prob(x_broadcasted) x_ll = tf.reduce_sum(x_ll, axis=3) x_ll = tf.reduce_mean(x_ll, axis=1) self.sample_t_ph = tf.placeholder(tf.int32, ()) self.eval_ops = { 'generative_post': x_post, 'qmu': qmu, 'qsigma': qsigma, 'post_running_mu': tf.reduce_mean( qmu.params[:self.sample_t_ph], axis=0 ) 'post_log_prob': xll }