def _test(self, sess, x_data, n_minibatch, x_val=None, is_file=False): mu = Normal(mu=0.0, sigma=1.0) if n_minibatch is None: x = Normal(mu=tf.ones(10) * mu, sigma=1.0) else: x = Normal(mu=tf.ones(n_minibatch) * mu, sigma=1.0) qmu = Normal(mu=tf.Variable(tf.random_normal([])), sigma=tf.constant(1.0)) data = {x: x_data} inference = ed.MFVI({mu: qmu}, data) inference.initialize(n_minibatch=n_minibatch) init = tf.initialize_all_variables() init.run() # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) if x_val is not None: # Placeholder setting. # Check data is same as data fed to it. feed_dict = {inference.data[x]: x_val} # avoid directly fetching placeholder data_id = [tf.identity(v) for v in six.itervalues(inference.data)] val = sess.run(data_id, feed_dict) assert np.all(val == x_val) elif is_file: # File reader setting. # Check data varies by session run. val = sess.run(inference.data[x]) val_1 = sess.run(inference.data[x]) assert not np.all(val == val_1) elif n_minibatch is None: # Preloaded full setting. # Check data is full data. val = sess.run(inference.data[x]) assert np.all(val == data[x]) elif n_minibatch == 1: # Preloaded batch setting, with n_minibatch=1. # Check data is randomly shuffled. assert not np.all( [sess.run(inference.data)[x] == data[x][i] for i in range(10)]) else: # Preloaded batch setting. # Check data is randomly shuffled. val = sess.run(inference.data) assert not np.all(val[x] == data[x][:n_minibatch]) # Check data varies by session run. val_1 = sess.run(inference.data) assert not np.all(val[x] == val_1[x]) inference.finalize() coord.request_stop() coord.join(threads)
def main(): data = ed.Data(np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1])) model = BetaBernoulli() variational = Variational() variational.add(Beta()) # mean-field variational inference. inference = ed.MFVI(model, variational, data) inference.run(n_iter=10000)
def _test(self, sess, data, n_minibatch, x=None, is_file=False): model = NormalModel() variational = Variational() variational.add(Normal()) inference = ed.MFVI(model, variational, data) inference.initialize(n_minibatch=n_minibatch) if x is not None: # Placeholder setting. # Check data is same as data fed to it. feed_dict = {inference.data['x']: x} # avoid directly fetching placeholder data_id = { k: tf.identity(v) for k, v in six.iteritems(inference.data) } val = sess.run(data_id, feed_dict) assert np.all(val['x'] == x) elif is_file: # File reader setting. # Check data varies by session run. val = sess.run(inference.data) val_1 = sess.run(inference.data) assert not np.all(val['x'] == val_1['x']) elif n_minibatch is None: # Preloaded full setting. # Check data is full data. val = sess.run(inference.data) assert np.all(val['x'] == data['x']) elif n_minibatch == 1: # Preloaded batch setting, with n_minibatch=1. # Check data is randomly shuffled. assert not np.all([ sess.run(inference.data)['x'] == data['x'][i] for i in range(10) ]) else: # Preloaded batch setting. # Check data is randomly shuffled. val = sess.run(inference.data) assert not np.all(val['x'] == data['x'][:n_minibatch]) # Check data varies by session run. val_1 = sess.run(inference.data) assert not np.all(val['x'] == val_1['x']) inference.finalize()
Variational model Likelihood: Mean-field Beta """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import numpy as np import tensorflow as tf from edward.models import Bernoulli, Beta ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(a=1.0, b=1.0) x = Bernoulli(p=tf.ones(10) * p) # INFERENCE qp_a = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qp_b = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qp = Beta(a=qp_a, b=qp_b) data = {x: x_data} inference = ed.MFVI({p: qp}, data) inference.run(n_iter=500)
inference.run(). Alternatively, we directly access the TensorFlow session and manipulate various objects during inference. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Normal ed.set_seed(42) # MODEL z = Normal(mu=1.0, sigma=1.0) # INFERENCE qz = Normal(mu=tf.Variable(tf.random_normal([])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([])))) inference = ed.MFVI({z: qz}) inference.initialize(n_iter=250) init = tf.initialize_all_variables() init.run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict)
qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qb_mu = tf.Variable(tf.random_normal([])) qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qw = Normal(mu=qw_mu, sigma=qw_sigma) qb = Normal(mu=qb_mu, sigma=qb_sigma) # Set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) sess = ed.get_session() data = {'x': x_train, 'y': y_train} inference = ed.MFVI({'w': qw, 'b': qb}, data, model) inference.initialize(n_print=5, n_iter=600) init = tf.initialize_all_variables() init.run() for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: # Sample functions from variational model w_mean, w_std = sess.run([qw.mu, qb.sigma]) b_mean, b_std = sess.run([qb.mu, qb.sigma]) rs = np.random.RandomState(0) ws = (rs.randn(1, 10) * w_std + w_mean).astype(np.float32)
ed.set_seed(42) x_train, y_train = build_toy_dataset() model = BayesianNN(layer_sizes=[1, 10, 10, 1], nonlinearity=rbf) qw = [] qb = [] for l in range(model.n_layers): m, n = model.weight_dims[l] qw_mu = tf.Variable(tf.random_normal([m, n])) qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([m, n]))) qb_mu = tf.Variable(tf.random_normal([n])) qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([n]))) qw += [Normal(mu=qw_mu, sigma=qw_sigma)] qb += [Normal(mu=qb_mu, sigma=qb_sigma)] data = {'x': x_train, 'y': y_train} inference = ed.MFVI( { 'w0': qw[0], 'b0': qb[0], 'w1': qw[1], 'b1': qb[1], 'w2': qw[2], 'b2': qb[2] }, data, model) inference.run()
K = 2 D = 2 model = MixtureGaussian(K, D) qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K]))) qmu_mu = tf.Variable(tf.random_normal([K * D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qpi = Dirichlet(alpha=qpi_alpha) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta) data = {'x': x_train} inference = ed.MFVI({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model) inference.run(n_iter=2500, n_samples=10, n_minibatch=20) # Average per-cluster and per-data point likelihood over many posterior samples. log_liks = [] for s in range(100): zrep = {'pi': qpi.sample(()), 'mu': qmu.sample(()), 'sigma': qsigma.sample(())} log_liks += [model.predict(data, zrep)] log_liks = tf.reduce_mean(log_liks, 0) # Choose the cluster with the highest likelihood for each data point. clusters = tf.argmax(log_liks, 0).eval() plt.scatter(x_train[:, 0], x_train[:, 1], c=clusters, cmap=cm.bwr)
model = BayesianNN(layer_sizes=[1, 10, 10, 1], nonlinearity=rbf) qz_mu = tf.Variable(tf.random_normal([model.n_vars])) qz_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([model.n_vars]))) qz = Normal(mu=qz_mu, sigma=qz_sigma) # Set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) sess = ed.get_session() data = {'x': x_train, 'y': y_train} inference = ed.MFVI({'z': qz}, data, model) inference.initialize() init = tf.initialize_all_variables() init.run() for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: # Sample functions from variational model mean, std = sess.run([qz.mu, qz.sigma]) rs = np.random.RandomState(0) zs = rs.randn(10, model.n_vars) * std + mean zs = tf.convert_to_tensor(zs, dtype=tf.float32)
b_2 = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) x = tf.convert_to_tensor(x_train, dtype=tf.float32) y = Normal(mu=neural_network(x), sigma=0.1 * tf.ones(N)) # INFERENCE qW_0 = Normal(mu=tf.Variable(tf.random_normal([D, 10])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, 10])))) qW_1 = Normal(mu=tf.Variable(tf.random_normal([10, 10])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([10, 10])))) qW_2 = Normal(mu=tf.Variable(tf.random_normal([10, 1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([10, 1])))) qb_0 = Normal(mu=tf.Variable(tf.random_normal([10])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([10])))) qb_1 = Normal(mu=tf.Variable(tf.random_normal([10])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([10])))) qb_2 = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {y: y_train} inference = ed.MFVI( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data) inference.run()
#!/usr/bin/env python from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import numpy as np import tensorflow as tf from edward.models import Normal ed.set_seed(42) # Normal-Normal with known variance mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) qmu_mu = tf.Variable(tf.random_normal([])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) data = {x: np.array([0.0] * 50, dtype=np.float32)} # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.MFVI({mu: qmu}, data) inference.run()
qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([n]))) w += [Normal(mu=w_mu, sigma=w_sigma)] b += [Normal(mu=b_mu, sigma=b_sigma)] qw += [Normal(mu=qw_mu, sigma=qw_sigma)] qb += [Normal(mu=qb_mu, sigma=qb_sigma)] latent_vars[w[i - 1]] = qw[i - 1] latent_vars[b[i - 1]] = qb[i - 1] x = tf.convert_to_tensor(trainx, dtype=tf.float32) y = Categorical(neural_network(x, w, b)) # INFERENCE data = {y: trainy} inference = ed.MFVI(latent_vars, data) #%% # Sample functions from variational model to then evaluate predictions x_ = tf.placeholder(tf.float32, shape=[None, n_D]) y_ = tf.placeholder(tf.float32, shape=[None, n_out]) n_posterior_samples = args.n_posterior_samples # Monte Carlo estimate of the mean of the posterior predictive mus = [] for _ in range(n_posterior_samples): qw_sample = [] qb_sample = [] for i in range(1, len(N)): qw_sample += [qw[i - 1].sample()]
Probability model Posterior: (1-dimensional) Bernoulli Variational model Likelihood: Mean-field Bernoulli """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Bernoulli, PointMass from edward.stats import bernoulli class BernoulliPosterior: """p(x, z) = p(z) = p(z | x) = Bernoulli(z; p)""" def log_prob(self, xs, zs): return bernoulli.logpmf(zs['p'], p=0.6) ed.set_seed(42) model = BernoulliPosterior() qp_p = tf.nn.sigmoid(tf.Variable(tf.random_normal([]))) qp = Bernoulli(p=qp_p) inference = ed.MFVI({'p': qp}, model_wrapper=model) inference.run(n_iter=100, n_samples=5, n_print=10)
# DATA. MNIST batches are fed at training time. mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) # MODEL z = Normal(mu=tf.zeros([N_MINIBATCH, d]), sigma=tf.ones([N_MINIBATCH, d])) logits = generative_network(z.value()) x = Bernoulli(logits=logits) # INFERENCE x_ph = ed.placeholder(tf.float32, [N_MINIBATCH, 28 * 28]) mu, sigma = inference_network(x_ph) qz = Normal(mu=mu, sigma=sigma) # Bind p(x, z) and q(z | x) to the same placeholder for x. data = {x: x_ph} inference = ed.MFVI({z: qz}, data) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) init = tf.initialize_all_variables() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(n_iter_per_epoch, widgets=widgets) pbar.start() for t in range(n_iter_per_epoch):
ed.set_seed(42) model = LinearModel() variational = Variational() variational.add(Normal(model.n_vars)) data = build_toy_dataset() # Set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) sess = ed.get_session() inference = ed.MFVI(model, variational, data) inference.initialize(n_samples=5, n_print=5) for t in range(250): loss = inference.update() if t % inference.n_print == 0: print("iter {:d} loss {:.2f}".format(t, loss)) # Sample functions from variational model mean, std = sess.run( [variational.layers[0].loc, variational.layers[0].scale]) rs = np.random.RandomState(0) zs = rs.randn(10, variational.n_vars) * std + mean zs = tf.convert_to_tensor(zs, dtype=tf.float32) inputs = np.linspace(-8, 8, num=400, dtype=np.float32) x = tf.expand_dims(inputs, 1) W = tf.expand_dims(zs[:, 0], 0)
class BetaBernoulli: """p(x, p) = Bernoulli(x | p) * Beta(p | 1, 1)""" def log_prob(self, xs, zs): log_prior = beta.logpdf(zs['p'], a=1.0, b=1.0) log_lik = tf.reduce_sum(bernoulli.logpmf(xs['x'], p=zs['p'])) return log_lik + log_prior def sample_likelihood(self, zs): """x | p ~ p(x | p)""" return {'x': bernoulli.sample(p=tf.ones(10) * zs['p'])} def T(xs, zs): return tf.reduce_mean(tf.cast(xs['x'], tf.float32)) ed.set_seed(42) data = {'x': np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1])} model = BetaBernoulli() qp_a = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qp_b = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qp = Beta(a=qp_a, b=qp_b) inference = ed.MFVI({'p': qp}, data, model) inference.run(n_iter=200) print(ed.ppc(T, data, latent_vars={'p': qp}, model_wrapper=model))
qb_mu = qb_mu - tf.reduce_mean(qb_mu) qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([Db,1]))) qb = Normal(mu=qb_mu, sigma=qb_sigma) zs = {'beta': qw, 'b': qb, 'Intercept': qi} Xnew = ed.placeholder(tf.float32, shape=(None, D)) Znew = ed.placeholder(tf.float32, shape=(None, Db)) ynew = ed.placeholder(tf.float32, shape=(None, )) data = {'X': Xnew, 'y': ynew, 'Z': Znew} edmodel = MixedModel(lik_std=10.0,prior_std=100.0) sess = ed.get_session() inference = ed.MFVI(zs, data, edmodel) #optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) #optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) optimizer = tf.train.GradientDescentOptimizer(0.001) inference.initialize(optimizer=optimizer, n_samples=10) init = tf.initialize_all_variables() init.run() NEPOCH = 5000 train_loss = np.zeros(NEPOCH) test_loss = np.zeros(NEPOCH) batch_xs, batch_zs, batch_ys = x_train, Z, y_train
#!/usr/bin/env python """Normal posterior. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Normal from edward.stats import norm class NormalPosterior: """p(x, z) = p(z) = p(z | x) = Normal(z; mu, sigma)""" def log_prob(self, xs, zs): return norm.logpdf(zs['z'], 1.0, 1.0) ed.set_seed(42) model = NormalPosterior() qz_mu = tf.Variable(tf.random_normal([])) qz_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qz = Normal(mu=qz_mu, sigma=qz_sigma) inference = ed.MFVI({'z': qz}, model_wrapper=model) inference.run(n_iter=250)
np.linspace(6, 8, num=N / 2)]) y = 5.0 * X + norm.rvs(0, noise_std, size=N) X = X.reshape((N, 1)) return X.astype(np.float32), y.astype(np.float32) N = 40 # num data points p = 1 # num features ed.set_seed(42) X_data, y_data = build_toy_dataset(N) X = X_data beta = Normal(mu=tf.zeros(p), sigma=tf.ones(p)) y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N)) qmu_mu = tf.Variable(tf.random_normal([p])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([p]))) qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma) data = {y: y_data} inference = ed.MFVI({beta: qbeta}, data) inference.initialize() sess = ed.get_session() for t in range(501): _, loss = sess.run([inference.train, inference.loss]) inference.print_progress(t, loss)
x = tf.convert_to_tensor(x_train, dtype=tf.float32) y = Normal(mu=neural_network(x, W_0, W_1, b_0, b_1), sigma=0.1 * tf.ones(N)) # INFERENCE qW_0 = Normal(mu=tf.Variable(tf.random_normal([D, 2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, 2])))) qW_1 = Normal(mu=tf.Variable(tf.random_normal([2, 1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2, 1])))) qb_0 = Normal(mu=tf.Variable(tf.random_normal([2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2])))) qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {y: y_train} inference = ed.MFVI({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data) # Sample functions from variational model to visualize fits. rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1) mus = [] for s in range(10): mus += [ neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(), qb_1.sample()) ] mus = tf.pack(mus) sess = ed.get_session()
D = 10 # num features # DATA coeff = np.random.randn(D) X_train, y_train = build_toy_dataset(N, coeff) X_test, y_test = build_toy_dataset(N, coeff) # MODEL X = ed.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.MFVI({w: qw, b: qb}, data) inference.run(n_samples=5, n_iter=250) # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))