def test_getitem(self): with self.test_session() as sess: x = Normal(tf.zeros([3, 4]), tf.ones([3, 4])) z = x[0:2, 2:3] z_value = x.value()[0:2, 2:3] z_eval, z_value_eval = sess.run([z, z_value]) self.assertAllEqual(z_eval, z_value_eval)
def test_neg(self): with self.test_session() as sess: x = Normal(0.0, 1.0) z = -x z_value = -x.value() z_eval, z_value_eval = sess.run([z, z_value]) self.assertAllEqual(z_eval, z_value_eval)
def _test_normal_normal(self, Inference, default, *args, **kwargs): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) if not default: qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = Inference({mu: qmu}, data={x: x_data}) else: inference = Inference([mu], data={x: x_data}) qmu = inference.latent_vars[mu] inference.run(*args, **kwargs) self.assertAllClose(qmu.mean().eval(), 0, rtol=0.1, atol=0.6) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=0.15, atol=0.5) variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer') old_t, old_variables = sess.run([inference.t, variables]) self.assertEqual(old_t, inference.n_iter) sess.run(inference.reset) new_t, new_variables = sess.run([inference.t, variables]) self.assertEqual(new_t, 0) self.assertNotEqual(old_variables, new_variables)
def build_update(self): """Simulate Langevin dynamics using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. #### Notes The updates assume each Empirical random variable is directly parameterized by `tf.Variable`s. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Simulate Langevin dynamics. learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) sample = {} for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint): qz = self.latent_vars[z] event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=learning_rate * tf.ones(event_shape)) sample[z] = old_sample[z] + \ 0.5 * learning_rate * tf.convert_to_tensor(grad_log_p) + \ normal.sample() # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
class AutoRegressive(RandomVariable, Distribution): # a 1-D AR(1) process # a[t + 1] = a[t] + eps with eps ~ N(0, sig**2) def __init__(self, T, a, sig, *args, **kwargs): self.a = a self.sig = sig self.T = T self.shocks = Normal(tf.zeros(T), scale=sig) self.z = tf.scan(lambda acc, x: self.a * acc + x, self.shocks) if 'dtype' not in kwargs: kwargs['dtype'] = tf.float32 if 'allow_nan_stats' not in kwargs: kwargs['allow_nan_stats'] = False if 'reparameterization_type' not in kwargs: kwargs['reparameterization_type'] = FULLY_REPARAMETERIZED if 'validate_args' not in kwargs: kwargs['validate_args'] = False if 'name' not in kwargs: kwargs['name'] = 'AutoRegressive' super(AutoRegressive, self).__init__(*args, **kwargs) self._args = (T, a, sig) def _log_prob(self, value): err = value - self.a * tf.pad(value[:-1], [[1, 0]], 'CONSTANT') lpdf = self.shocks._log_prob(err) return tf.reduce_sum(lpdf) def _sample_n(self, n, seed=None): return tf.scan(lambda acc, x: self.a * acc + x, self.shocks._sample_n(n, seed))
def main(_): ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def build_update(self): """ Simulate Langevin dynamics using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Simulate Langevin dynamics. learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) sample = {} for z, qz, grad_log_p in \ zip(six.iterkeys(self.latent_vars), six.itervalues(self.latent_vars), grad_log_joint): event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=learning_rate * tf.ones(event_shape)) sample[z] = old_sample[z] + 0.5 * learning_rate * grad_log_p + \ normal.sample() # Update Empirical random variables. assign_ops = [] variables = {x.name: x for x in tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)} for z, qz in six.iteritems(self.latent_vars): variable = variables[qz.params.op.inputs[0].op.inputs[0].name] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def test_abs(self): with self.test_session() as sess: x = Normal(0.0, 1.0) z = abs(x) z_value = abs(x.value()) z_eval, z_value_eval = sess.run([z, z_value]) self.assertAllEqual(z_eval, z_value_eval)
def test_div(self): with self.test_session() as sess: x = Normal(0.0, 1.0) y = 5.0 z = x / y z_value = x.value() / y z_eval, z_value_eval = sess.run([z, z_value]) self.assertAllEqual(z_eval, z_value_eval)
def test_rfloordiv(self): with self.test_session() as sess: x = Normal(0.0, 1.0) y = 5.0 z = y // x z_value = y // x.value() z_eval, z_value_eval = sess.run([z, z_value]) self.assertAllEqual(z_eval, z_value_eval)
def test_dict_tensor_rv(self): with self.test_session(): set_seed(95258) x = Normal(mu=0.0, sigma=0.1) y = tf.constant(1.0) z = x * y qx = Normal(mu=10.0, sigma=0.1) z_new = copy(z, {x.value(): qx}) self.assertGreater(z_new.eval(), 5.0)
def _test(mu, sigma, n): rv = Normal(mu=mu, sigma=sigma) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) mu = mu.eval() sigma = sigma.eval() assert np.allclose(rv.log_prob(x_tf).eval(), stats.norm.logpdf(x, mu, sigma))
def test_swap_tensor_rv(self): with self.test_session(): ed.set_seed(95258) x = Normal(0.0, 0.1) y = tf.constant(1.0) z = x * y qx = Normal(10.0, 0.1) z_new = ed.copy(z, {x.value(): qx}) self.assertGreater(z_new.eval(), 5.0)
def test_list(self): with self.test_session() as sess: x = Normal(tf.constant(0.0), tf.constant(0.1)) y = Normal(tf.constant(10.0), tf.constant(0.1)) cat = Categorical(logits=tf.zeros(5)) components = [Normal(x, tf.constant(0.1)) for _ in range(5)] z = Mixture(cat=cat, components=components) z_new = ed.copy(z, {x: y.value()}) self.assertGreater(z_new.value().eval(), 5.0)
def build_update(self): """Simulate Hamiltonian dynamics using a numerical integrator. Correct for the integrator's discretization error using an acceptance ratio. #### Notes The updates assume each Empirical random variable is directly parameterized by `tf.Variable`s. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} old_sample = OrderedDict(old_sample) # Sample momentum. old_r_sample = OrderedDict() for z, qz in six.iteritems(self.latent_vars): event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=tf.ones(event_shape)) old_r_sample[z] = normal.sample() # Simulate Hamiltonian dynamics. new_sample, new_r_sample = leapfrog(old_sample, old_r_sample, self.step_size, self._log_joint, self.n_steps) # Calculate acceptance ratio. ratio = tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r)) for r in six.itervalues(old_r_sample)]) ratio -= tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r)) for r in six.itervalues(new_r_sample)]) ratio += self._log_joint(new_sample) ratio -= self._log_joint(old_sample) # Accept or reject sample. u = Uniform().sample() accept = tf.log(u) < ratio sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)), lambda: list(six.itervalues(old_sample))) if not isinstance(sample_values, list): # `tf.cond` returns tf.Tensor if output is a list of size 1. sample_values = [sample_values] sample = {z: sample_value for z, sample_value in zip(six.iterkeys(new_sample), sample_values)} # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept (if accepted). assign_ops.append(self.n_accept.assign_add(tf.where(accept, 1, 0))) return tf.group(*assign_ops)
def build_update(self): """ Simulate Hamiltonian dynamics using a numerical integrator. Correct for the integrator's discretization error using an acceptance ratio. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Sample momentum. old_r_sample = {} for z, qz in six.iteritems(self.latent_vars): event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=tf.ones(event_shape)) old_r_sample[z] = normal.sample() # Simulate Hamiltonian dynamics. new_sample = old_sample new_r_sample = old_r_sample for _ in range(self.n_steps): new_sample, new_r_sample = leapfrog(old_sample, old_r_sample, self.step_size, self._log_joint) # Calculate acceptance ratio. ratio = tf.reduce_sum([0.5 * tf.square(r) for r in six.itervalues(old_r_sample)]) ratio -= tf.reduce_sum([0.5 * tf.square(r) for r in six.itervalues(new_r_sample)]) ratio += self._log_joint(new_sample) ratio -= self._log_joint(old_sample) # Accept or reject sample. u = Uniform().sample() accept = tf.log(u) < ratio sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)), lambda: list(six.itervalues(old_sample))) if not isinstance(sample_values, list): # ``tf.cond`` returns tf.Tensor if output is a list of size 1. sample_values = [sample_values] sample = {z: sample_value for z, sample_value in zip(six.iterkeys(new_sample), sample_values)} # Update Empirical random variables. assign_ops = [] variables = {x.name: x for x in tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)} for z, qz in six.iteritems(self.latent_vars): variable = variables[qz.params.op.inputs[0].op.inputs[0].name] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept (if accepted). assign_ops.append(self.n_accept.assign_add(tf.select(accept, 1, 0))) return tf.group(*assign_ops)
def _test(shape, n): rv = Normal(shape, loc=tf.zeros(shape), scale=tf.ones(shape)) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) loc = rv.loc.eval() scale = rv.scale.eval() for idx in range(shape[0]): assert np.allclose( rv.log_prob_idx((idx, ), x_tf).eval(), stats.norm.logpdf(x[:, idx], loc[idx], scale[idx]))
def _test_log_prob_i(n_minibatch, num_factors): normal = Normal(num_factors, loc=tf.constant([0.0] * num_factors), scale=tf.constant([1.0] * num_factors)) with sess.as_default(): m = normal.loc.eval() s = normal.scale.eval() z = np.random.randn(n_minibatch, num_factors) for i in range(num_factors): assert np.allclose( normal.log_prob_i(i, tf.constant(z, dtype=tf.float32)).eval(), stats.norm.logpdf(z[:, i], m[i], s[i]))
def _test(shape, n_minibatch): normal = Normal(shape, loc=tf.constant([0.0] * shape), scale=tf.constant([1.0] * shape)) with sess.as_default(): m = normal.loc.eval() s = normal.scale.eval() z = np.random.randn(n_minibatch, shape) for i in range(shape): assert np.allclose( normal.log_prob_idx((i, ), tf.constant(z, dtype=tf.float32)).eval(), stats.norm.logpdf(z[:, i], m[i], s[i]))
def main(_): ed.set_seed(142) # DATA x_train = build_toy_dataset(FLAGS.N, FLAGS.D, FLAGS.K) # MODEL w = Normal(loc=0.0, scale=10.0, sample_shape=[FLAGS.D, FLAGS.K]) z = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.M, FLAGS.K]) x = Normal(loc=tf.matmul(w, z, transpose_b=True), scale=tf.ones([FLAGS.D, FLAGS.M])) # INFERENCE qw_variables = [tf.get_variable("qw/loc", [FLAGS.D, FLAGS.K]), tf.get_variable("qw/scale", [FLAGS.D, FLAGS.K])] qw = Normal(loc=qw_variables[0], scale=tf.nn.softplus(qw_variables[1])) qz_variables = [tf.get_variable("qz/loc", [FLAGS.N, FLAGS.K]), tf.get_variable("qz/scale", [FLAGS.N, FLAGS.K])] idx_ph = tf.placeholder(tf.int32, FLAGS.M) qz = Normal(loc=tf.gather(qz_variables[0], idx_ph), scale=tf.nn.softplus(tf.gather(qz_variables[1], idx_ph))) x_ph = tf.placeholder(tf.float32, [FLAGS.D, FLAGS.M]) inference_w = ed.KLqp({w: qw}, data={x: x_ph, z: qz}) inference_z = ed.KLqp({z: qz}, data={x: x_ph, w: qw}) scale_factor = float(FLAGS.N) / FLAGS.M inference_w.initialize(scale={x: scale_factor, z: scale_factor}, var_list=qz_variables, n_samples=5) inference_z.initialize(scale={x: scale_factor, z: scale_factor}, var_list=qw_variables, n_samples=5) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference_w.n_iter): x_batch, idx_batch = next_batch(x_train, FLAGS.M) for _ in range(5): inference_z.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch}) info_dict = inference_w.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch}) inference_w.print_progress(info_dict) t = info_dict['t'] if t % 100 == 0: print("\nInferred principal axes:") print(sess.run(qw.mean()))
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.KLpq({mu: qmu}, data={x: x_data}) inference.run(n_samples=25, n_iter=100) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1)
def test_normal_run(self): def ratio_estimator(data, local_vars, global_vars): """Use the optimal ratio estimator, r(z) = log p(z). We add a TensorFlow variable as the algorithm assumes that the function has parameters to optimize.""" w = tf.get_variable("w", []) return z.log_prob(local_vars[z]) + w with self.test_session() as sess: z = Normal(loc=5.0, scale=1.0) qz = Normal(loc=tf.Variable(tf.random_normal([])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([])))) inference = ed.ImplicitKLqp({z: qz}, discriminator=ratio_estimator) inference.run(n_iter=200) self.assertAllClose(qz.mean().eval(), 5.0, atol=1.0)
def build_update(self): """Simulate Hamiltonian dynamics with friction using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. Implements the update equations from (15) of Chen et al. (2014). """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} old_v_sample = {z: v for z, v in six.iteritems(self.v)} # Simulate Hamiltonian dynamics with friction. friction = tf.constant(self.friction, dtype=tf.float32) learning_rate = tf.constant(self.step_size * 0.01, dtype=tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) # v_sample is so named b/c it represents a velocity rather than momentum. sample = {} v_sample = {} for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint): qz = self.latent_vars[z] event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=(tf.sqrt(learning_rate * friction) * tf.ones(event_shape))) sample[z] = old_sample[z] + old_v_sample[z] v_sample[z] = ((1. - 0.5 * friction) * old_v_sample[z] + learning_rate * tf.convert_to_tensor(grad_log_p) + normal.sample()) # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) assign_ops.append(tf.assign(self.v[z], v_sample[z]).op) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def main(_): # data J = 8 data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) # model definition mu = Normal(0., 10.) logtau = Normal(5., 1.) theta_prime = Normal(tf.zeros(J), tf.ones(J)) sigma = tf.placeholder(tf.float32, J) y = Normal(mu + tf.exp(logtau) * theta_prime, sigma * tf.ones([J])) data = {y: data_y, sigma: data_sigma} # ed.KLqp inference with tf.variable_scope('q_logtau'): q_logtau = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_mu'): q_mu = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_theta_prime'): q_theta_prime = Normal(tf.get_variable('loc', [J]), tf.nn.softplus(tf.get_variable('scale', [J]))) inference = ed.KLqp({logtau: q_logtau, mu: q_mu, theta_prime: q_theta_prime}, data=data) inference.run(n_samples=15, n_iter=60000) print("==== ed.KLqp inference ====") print("E[mu] = %f" % (q_mu.mean().eval())) print("E[logtau] = %f" % (q_logtau.mean().eval())) print("E[theta_prime]=") print((q_theta_prime.mean().eval())) print("==== end ed.KLqp inference ====") print("") print("") # HMC inference S = 400000 burn = S // 2 hq_logtau = Empirical(tf.get_variable('hq_logtau', [S])) hq_mu = Empirical(tf.get_variable('hq_mu', [S])) hq_theta_prime = Empirical(tf.get_variable('hq_thetaprime', [S, J])) inference = ed.HMC({logtau: hq_logtau, mu: hq_mu, theta_prime: hq_theta_prime}, data=data) inference.run() print("==== ed.HMC inference ====") print("E[mu] = %f" % (hq_mu.params.eval()[burn:].mean())) print("E[logtau] = %f" % (hq_logtau.params.eval()[burn:].mean())) print("E[theta_prime]=") print(hq_theta_prime.params.eval()[burn:, ].mean(0)) print("==== end ed.HMC inference ====") print("") print("")
def __init__(self, T, a, sig, *args, **kwargs): self.a = a self.sig = sig self.T = T self.shocks = Normal(tf.zeros(T), scale=sig) self.z = tf.scan(lambda acc, x: self.a * acc + x, self.shocks) if 'dtype' not in kwargs: kwargs['dtype'] = tf.float32 if 'allow_nan_stats' not in kwargs: kwargs['allow_nan_stats'] = False if 'reparameterization_type' not in kwargs: kwargs['reparameterization_type'] = FULLY_REPARAMETERIZED if 'validate_args' not in kwargs: kwargs['validate_args'] = False if 'name' not in kwargs: kwargs['name'] = 'AutoRegressive' super(AutoRegressive, self).__init__(*args, **kwargs) self._args = (T, a, sig)
import edward as ed import random # CIFAR-10 데이터를 다운로드 받기 위한 helpder 모듈인 load_data 모듈을 임포트합니다. from tensorflow.python.keras._impl.keras.datasets.cifar10 import load_data # CIFAR-10 데이터를 다운로드하고 데이터를 불러옵니다. (x_train, y_train), (x_test, y_test) = load_data() # parameters N = 256 # number of images in a minibatch. # Create a placeholder to hold the data (in minibatches) in a TensorFlow graph. x = tf.placeholder(tf.float32, [None, 32, 32, 3]) # Normal(0,1) priors for the variables. Note that the syntax assumes TensorFlow 1.1. w1 = Normal(loc=tf.zeros([3, 3, 3, 32]), scale=tf.ones([3, 3, 3, 32])) l1 = tf.nn.conv2d(x, w1, strides=[1, 1, 1, 1], padding='SAME') l1 = tf.nn.relu(l1) l1 = tf.nn.max_pool(l1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') w2 = Normal(loc=tf.zeros([3, 3, 32, 64]), scale=tf.ones([3, 3, 32, 64])) l2 = tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME') l2 = tf.nn.relu(l2) l2 = tf.nn.max_pool(l2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
from __future__ import print_function import edward as ed import matplotlib.pyplot as plt import numpy as np import tensorflow as tf from edward.models import Empirical, Normal ed.set_seed(42) # DATA x_data = np.array([0.0] * 50) # MODEL: Normal-Normal with known variance mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) # INFERENCE qmu = Empirical(params=tf.Variable(tf.zeros([1000]))) proposal_mu = Normal(mu=0.0, sigma=tf.sqrt(1.0 / 51.0)) # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.MetropolisHastings({mu: qmu}, {mu: proposal_mu}, data={x: x_data}) inference.run() # CRITICISM # Check convergence with visual diagnostics. sess = ed.get_session()
mat[i] += [multivariate_rbf(xi, xj)] mat[i] = tf.pack(mat[i]) return tf.pack(mat) ed.set_seed(42) # DATA df = np.loadtxt('data/crabs_train.txt', dtype='float32', delimiter=',') df[df[:, 0] == -1, 0] = 0 # replace -1 label with 0 label N = 25 # number of data points D = df.shape[1] - 1 # number of features subset = np.random.choice(df.shape[0], N, replace=False) X_train = df[subset, 1:] y_train = df[subset, 0] # MODEL X = tf.placeholder(tf.float32, [N, D]) f = MultivariateNormalFull(mu=tf.zeros(N), sigma=kernel(X)) y = Bernoulli(logits=f) # INFERENCE qf = Normal(mu=tf.Variable(tf.random_normal([N])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N])))) data = {X: X_train, y: y_train} inference = ed.KLqp({f: qf}, data) inference.run(n_iter=500)
from edward.models import Normal from sklearn import datasets from sklearn.model_selection import train_test_split x_data, y_data = datasets.load_boston(return_X_y=True) x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.5, random_state=42) len_train = len(x_train) len_test = len(x_test) features = 13 x = tf.placeholder(tf.float32, [None, features]) w = Normal(loc=tf.zeros(features), scale=tf.ones(features)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(x, w) + b, scale=tf.ones(len_train)) qw = Normal(loc=tf.get_variable("qw/loc", [features]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [features]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) inference = ed.KLpq({w: qw, b: qb}, data={x: x_train, y: y_train}) inference.run(n_samples=506, n_iter=250) y_post = ed.copy(y, {w: qw, b: qb}) print("Mean squared error on training data:") print(ed.evaluate('mean_squared_error', data={x: x_train, y_post: y_train}))
period_pre = tf.Variable(np.log(np.exp(7.0 * len_init) - 1), dtype=tf.float32) period_len_pre = tf.Variable(1.0) period_var_pre = tf.Variable(np.log(np.exp(0.5) - 1), dtype=tf.float32) # period = tf.nn.softplus(period_pre) period_length = tf.nn.softplus(period_len_pre) Kuu = kernelfx(xu, xu) fu_loc = tf.zeros((p, m)) fu_scale = tf.cast(tf.cholesky(Kuu + offset * tf.eye(m, dtype=tf.float64), name='fu_scale'), dtype=tf.float32) u = MultivariateNormalTriL(loc=fu_loc, scale_tril=fu_scale, name='pu') x_var = Normal(loc=tf.zeros((M, Q)), scale=1.0, name='x_var') idx_ph = tf.placeholder(tf.int32, M) z = tf.constant(z_init, dtype=tf.float32) x = tf.concat([x_var, tf.gather(z, idx_ph)], 1, name='x') print(x.shape) Kfu = kernelfx(x, xu) Kff = kernelfx(x, x) Kuuinv = tf.matrix_inverse(Kuu + offset * tf.eye(m, dtype=tf.float64)) KfuKuuinv = tf.matmul(Kfu, Kuuinv) KffKuuinvU = [
def test_repeat_vector(self): x = Normal(loc=tf.zeros([100, 10]), scale=tf.ones([100, 10])) y = layers.RepeatVector(2)(x.value()) with self.test_session(): self.assertEqual(y.eval().shape, (100, 2, 10))
def build_update(self): """Simulate Hamiltonian dynamics using a numerical integrator. Correct for the integrator's discretization error using an acceptance ratio. #### Notes The updates assume each Empirical random variable is directly parameterized by `tf.Variable`s. """ old_sample = { z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars) } old_sample = OrderedDict(old_sample) # Sample momentum. old_r_sample = OrderedDict() for z, qz in six.iteritems(self.latent_vars): event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=tf.ones(event_shape)) old_r_sample[z] = normal.sample() # Simulate Hamiltonian dynamics. new_sample, new_r_sample = leapfrog(old_sample, old_r_sample, self.step_size, self._log_joint, self.n_steps) # Calculate acceptance ratio. ratio = tf.reduce_sum([ 0.5 * tf.reduce_sum(tf.square(r)) for r in six.itervalues(old_r_sample) ]) ratio -= tf.reduce_sum([ 0.5 * tf.reduce_sum(tf.square(r)) for r in six.itervalues(new_r_sample) ]) ratio += self._log_joint(new_sample) ratio -= self._log_joint(old_sample) # Accept or reject sample. u = Uniform().sample() accept = tf.log(u) < ratio sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)), lambda: list(six.itervalues(old_sample))) if not isinstance(sample_values, list): # `tf.cond` returns tf.Tensor if output is a list of size 1. sample_values = [sample_values] sample = { z: sample_value for z, sample_value in zip(six.iterkeys(new_sample), sample_values) } # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept (if accepted). assign_ops.append(self.n_accept.assign_add(tf.where(accept, 1, 0))) return tf.group(*assign_ops)
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std * D**-.5 * tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, n_hidden]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, n_hidden])) W_2 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std * D**-.5 * tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(n_hidden), scale=10 * n_hidden**(-.5) * tf.ones(n_hidden)) b_2 = Normal(loc=tf.zeros(K), scale=10 * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]), loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1, W_2, b_2)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [None]) # Use a placeholder for the pre-trained posteriors w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, n_hidden]) w2 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b2 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qW_2 = Empirical(params=tf.Variable(w2)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) qb_2 = Empirical(params=tf.Variable(b2)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100) return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, inference, w0, w1, w2, b0, b1, b2)
batch = 100 # M = None -> batch size during training x_ph_bin = tf.placeholder(tf.float32, [M, len(binfeats)], name='x_bin') # binary inputs x_ph_cont = tf.placeholder(tf.float32, [M, len(contfeats)], name='x_cont') # continuous inputs t_ph = tf.placeholder(tf.float32, [M, 1]) y_ph = tf.placeholder(tf.float32, [M, 1]) beta_holder = tf.placeholder('float32', [1, 1]) x_ph = tf.concat([x_ph_bin, x_ph_cont], 1) activation = tf.nn.elu # p(z) -> define prior z = Normal(loc=tf.zeros([tf.shape(x_ph)[0], d]), scale=tf.ones([tf.shape(x_ph)[0], d])) # *********************** Decoder start from here *********************** # p(t|z) nh, h = 3, 200 logits = fullyConnect_net(z, [h], [[1, None]], 'pt_z', lamba=lamba, activation=activation) t = Bernoulli(logits=logits, dtype=tf.float32) # p(y|t,z) mu2_t0 = fullyConnect_net(z, nh * [h], [[1, None]], 'py_t0z', lamba=lamba,
data, meta = boston_housing(data_dir) n_data = data.shape[0] train_idx = np.random.choice(range(n_data), int(n_data*p_train), replace=False) test_idx = np.setdiff1d(np.arange(n_data), train_idx) train, test = data[train_idx], data[test_idx] D, N = train.shape[1]-1, train.shape[0]//n_batch batch_generator = generator(train, N) X_train = tf.placeholder(tf.float32, [N, D]) y_train = tf.placeholder(tf.float32, [N, 1]) # MODEL with tf.name_scope("model"): W_0 = Normal(loc=tf.zeros([D, 10]), scale=tf.ones([D, 10]), name="W_0") W_1 = Normal(loc=tf.zeros([10, 10]), scale=tf.ones([10, 10]), name="W_1") W_2 = Normal(loc=tf.zeros([10, 1]), scale=tf.ones([10, 1]), name="W_2") b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_0") b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_1") b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2") X = tf.placeholder(tf.float32, [N, D], name="X") y = Normal(loc=VAFnet(X), scale=0.1 * tf.ones(N), name="y") # INFERENCE with tf.name_scope("posterior"): with tf.name_scope("qW_0"): qW_0 = Normal(loc=tf.Variable(tf.random_normal([D, 10]), name="loc"), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, 10]), name="scale")))
X = X.reshape((N, 1)) return X.astype(np.float32), y.astype(np.float32) ed.set_seed(42) N = 40 # number of data points D = 1 # number of features # DATA X_train, y_train = build_toy_dataset(N) X_test, y_test = build_toy_dataset(N) # MODEL X = ed.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run() # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()})
M = 128 # batch size during training d = 10 # latent dimension DATA_DIR = "data/mnist" IMG_DIR = "img" if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) if not os.path.exists(IMG_DIR): os.makedirs(IMG_DIR) # DATA. MNIST batches are fed at training time. mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) # MODEL z = Normal(mu=tf.zeros([M, d]), sigma=tf.ones([M, d])) logits = generative_network(z.value()) x = Bernoulli(logits=logits) # INFERENCE x_ph = tf.placeholder(tf.float32, [M, 28 * 28]) mu, sigma = inference_network(x_ph) qz = Normal(mu=mu, sigma=sigma) # Bind p(x, z) and q(z | x) to the same placeholder for x. data = {x: x_ph} inference = ed.ReparameterizationKLKLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) init = tf.initialize_all_variables()
def test_activity_regularization(self): x = Normal(loc=tf.zeros([100, 10, 5]), scale=tf.ones([100, 10, 5])) y = layers.ActivityRegularization(l1=0.1)(x.value())
ed.set_seed(142) N = 5000 # number of data points M = 100 # minibatch size D = 2 # data dimensionality K = 1 # latent dimensionality # DATA x_train = build_toy_dataset(N, D, K) # MODEL w = Normal(mu=tf.zeros([D, K]), sigma=10.0 * tf.ones([D, K])) z = Normal(mu=tf.zeros([M, K]), sigma=tf.ones([M, K])) x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, M])) # INFERENCE qw_variables = [ tf.Variable(tf.random_normal([D, K])), tf.Variable(tf.random_normal([D, K])) ] qw = Normal(mu=qw_variables[0], sigma=tf.nn.softplus(qw_variables[1])) qz_variables = [ tf.Variable(tf.random_normal([N, K])), tf.Variable(tf.random_normal([N, K])) ]
ed.set_seed(42) n_students = 50000 n_questions = 2000 n_obs = 200000 # DATA data, true_s_etas, true_q_etas = build_toy_dataset( n_students, n_questions, n_obs) obs = data['outcomes'].values student_ids = data['student_id'].values.astype(int) question_ids = data['question_id'].values.astype(int) # MODEL lnvar_students = Normal(loc=tf.zeros(1), scale=tf.ones(1)) lnvar_questions = Normal(loc=tf.zeros(1), scale=tf.ones(1)) sigma_students = tf.sqrt(tf.exp(lnvar_students)) sigma_questions = tf.sqrt(tf.exp(lnvar_questions)) overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1)) student_etas = Normal(loc=tf.zeros(n_students), scale=sigma_students * tf.ones(n_students)) question_etas = Normal(loc=tf.zeros(n_questions), scale=sigma_questions * tf.ones(n_questions)) observation_logodds = tf.gather(student_etas, student_ids) + \ tf.gather(question_etas, question_ids) + \ overall_mu
[np.linspace(0, 2, num=N / 2), np.linspace(6, 8, num=N / 2)]) y = np.cos(x) + norm.rvs(0, noise_std, size=N) x = (x - 4.0) / 4.0 x = x.reshape((N, D)) return x, y ed.set_seed(42) x_train, y_train = build_toy_dataset() model = BayesianNN(layer_sizes=[1, 10, 10, 1], nonlinearity=rbf) qz_mu = tf.Variable(tf.random_normal([model.n_vars])) qz_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([model.n_vars]))) qz = Normal(mu=qz_mu, sigma=qz_sigma) # Set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) # model.log_lik() is defined so KLqp will do variational inference # assuming a standard normal prior on the weights; this enables VI # with an analytic KL term which provides faster inference. sess = ed.get_session() data = {'x': x_train, 'y': y_train} inference = ed.KLqp({'z': qz}, data, model) inference.initialize(n_print=10)
# import edward and TensorFlow import edward as ed import tensorflow as tf from edward.models import Normal, Uniform, Empirical # import model and data from createdata import * # set the priors cmin = -10. # lower range of uniform distribution on c cmax = 10. # upper range of uniform distribution on c cp = Uniform(low=cmin, high=cmax) mmu = 0. # mean of Gaussian distribution on m msigma = 10. # standard deviation of Gaussian distribution on m mp = Normal(loc=mmu, scale=msigma) # set the likelihood containing the model y = Normal(loc=mp*x + cp, scale=sigma*tf.ones(len(data))) # set number of samples Nsamples = 2000 # final number of samples Ntune = 2000 # number of tuning samples # set parameters to infer qm = Empirical(params=tf.Variable(tf.zeros(Nsamples+Ntune))) qc = Empirical(params=tf.Variable(tf.zeros(Nsamples+Ntune))) # use Hamiltonian Monte Carlo inference = ed.HMC({mp: qm, cp: qc}, data={y: data}) inference.run(step_size=1.5e-2) # higher steps sizes can lead to zero acceptance rates
def neural_network(x, W_0, W_1, b_0, b_1): h = tf.tanh(tf.matmul(x, W_0) + b_0) h = tf.matmul(h, W_1) + b_1 return tf.reshape(h, [-1]) ed.set_seed(42) N = 50 # number of data ponts D = 1 # number of features # DATA x_train, y_train = build_toy_dataset(N) # MODEL W_0 = Normal(mu=tf.zeros([D, 2]), sigma=tf.ones([D, 2])) W_1 = Normal(mu=tf.zeros([2, 1]), sigma=tf.ones([2, 1])) b_0 = Normal(mu=tf.zeros(2), sigma=tf.ones(2)) b_1 = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) x = tf.convert_to_tensor(x_train, dtype=tf.float32) y = Normal(mu=neural_network(x, W_0, W_1, b_0, b_1), sigma=0.1 * tf.ones(N)) # INFERENCE qW_0 = Normal(mu=tf.Variable(tf.random_normal([D, 2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, 2])))) qW_1 = Normal(mu=tf.Variable(tf.random_normal([2, 1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2, 1])))) qb_0 = Normal(mu=tf.Variable(tf.random_normal([2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2]))))
N = 500 # number of data points K = 2 # number of components D = 2 # dimensionality of data ed.set_seed(42) # DATA x_train = build_toy_dataset(N) plt.scatter(x_train[:, 0], x_train[:, 1]) plt.axis([-3, 3, -3, 3]) plt.title("Simulated dataset") plt.show() # MODEL mu = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D])) sigma = InverseGamma(alpha=tf.ones([K, D]), beta=tf.ones([K, D])) cat = Categorical(logits=tf.zeros([N, K])) components = [ MultivariateNormalDiag(mu=tf.ones([N, 1]) * tf.gather(mu, k), diag_stdev=tf.ones([N, 1]) * tf.gather(sigma, k)) for k in range(K) ] x = Mixture(cat=cat, components=components) # INFERENCE qmu = Normal(mu=tf.Variable(tf.random_normal([K, D])), sigma=tf.nn.softplus(tf.Variable(tf.zeros([K, D])))) qsigma = InverseGamma(alpha=tf.nn.softplus( tf.Variable(tf.random_normal([K, D]))), beta=tf.nn.softplus(tf.Variable(tf.random_normal([K,
D = train_x.shape[1] # nombre of features K = train_y.shape[1] #nombre of class EPOCH_NUM = 100 batch = 100 #batch # for bayesian neural network train_y2 = np.argmax(train_y, axis=1) test_y2 = np.argmax(test_y, axis=1) #index of max x_ = tf.placeholder(tf.float32, shape=(None, D)) y_ = tf.placeholder(tf.int32, shape=(batch)) keep_prob = tf.placeholder(tf.float32) # Normal(0,1) priors for the variables. w = Normal(loc=tf.zeros([D, K]), scale=tf.ones([D, K])) b = Normal(loc=tf.zeros([K]), scale=tf.ones([K])) Wx_plus_b = tf.matmul(x_, w) + b #to dropout Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob) y_pre = Categorical(Wx_plus_b) qw = Normal(loc=tf.Variable(tf.random_normal([D, K])), scale=tf.Variable(tf.random_normal([D, K]))) qb = Normal(loc=tf.Variable(tf.random_normal([K])), scale=tf.Variable(tf.random_normal([K]))) y = Categorical(tf.matmul(x_, qw) + qb) inference = ed.KLqp({w: qw, b: qb}, data={y_pre: y_}) #inference.initialize()
X = (X - 4.0) / 4.0 X = X.reshape((N, D)) return X, y ed.set_seed(42) N = 40 # number of data points D = 1 # number of features # DATA X_train, y_train = build_toy_dataset(N) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=3.0 * tf.ones(D)) b = Normal(mu=tf.zeros([]), sigma=3.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # INFERENCE T = 5000 # number of samples qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T]))) inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() # Set up figure. fig = plt.figure(figsize=(8, 8), facecolor='white')
import matplotlib.pyplot as plt import numpy as np import tensorflow as tf from edward.models import Bernoulli, Normal from scipy.special import expit # DATA nsubj = 200 nitem = 25 trait_true = np.random.normal(size=[nsubj, 1]) thresh_true = np.random.normal(size=[1, nitem]) X_data = np.random.binomial(1, expit(trait_true - thresh_true)) # MODEL trait = Normal(mu=tf.zeros([nsubj, 1]), sigma=tf.ones([nsubj, 1])) thresh = Normal(mu=tf.zeros([1, nitem]), sigma=tf.ones([1, nitem])) X = Bernoulli(logits=tf.sub(trait, thresh)) # INFERENCE q_trait = Normal(mu=tf.Variable(tf.random_normal([nsubj, 1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([nsubj, 1])))) q_thresh = Normal(mu=tf.Variable(tf.random_normal([1, nitem])), sigma=tf.nn.softplus( tf.Variable(tf.random_normal([1, nitem])))) inference = ed.KLqp({trait: q_trait, thresh: q_thresh}, data={X: X_data}) inference.run(n_iter=2500, n_samples=10) # CRITICISM
X = np.concatenate( [np.linspace(0, 2, num=N / 2), np.linspace(6, 8, num=N / 2)]) y = 5.0 * X + np.random.normal(0, noise_std, size=N) X = X.reshape((N, 1)) return X, y ed.set_seed(42) N = 40 # num data points D = 1 # num features # DATA X_data, y_data = build_toy_dataset(N) # MODEL X = tf.cast(X_data, tf.float32) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb}, data={y: y_data}) inference.run()
ys = df['index'].values xs = df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']].values N = xs.shape[0] D = xs.shape[1] print("Number of data points: {}".format(N)) print("Number of features: {}".format(D)) # In[4]: X = tf.placeholder(tf.float32, [N, D]) f = MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(rbf(X))) y = Bernoulli(logits=f) # In[ ]: qf = Normal(loc=tf.get_variable("qf/loc", [N]), scale=tf.nn.softplus(tf.get_variable("qf/scale", [N]))) # In[ ]: inference = ed.KLqp({f: qf}, data={X: xs, y: ys}) inference.run(n_iter=5000)
N = 5000 # number of data points D = 2 # data dimensionality K = 1 # latent dimensionality # DATA x_train = build_toy_dataset(N, D, K) plt.scatter(x_train[0, :], x_train[1, :], color='blue', alpha=0.1) plt.axis([-10, 10, -10, 10]) plt.title("Simulated data set") plt.show() # MODEL w = Normal(mu=tf.zeros([D, K]), sigma=2.0 * tf.ones([D, K])) z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K])) x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, N])) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D, K])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, K])))) qz = Normal(mu=tf.Variable(tf.random_normal([N, K])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N, K])))) inference = ed.KLqp({w: qw, z: qz}, data={x: x_train}) inference.run(n_iter=500, n_print=100, n_samples=10) # CRITICISM
os.makedirs(DATA_DIR) if not os.path.exists(IMG_DIR): os.makedirs(IMG_DIR) ed.set_seed(42) M = 100 # batch size during training d = 2 # latent dimension # DATA. MNIST batches are fed at training time. mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(mu=tf.zeros([M, d]), sigma=tf.ones([M, d])) hidden = Dense(256, activation='relu')(z) x = Bernoulli(logits=Dense(28 * 28)(hidden)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [M, 28 * 28]) hidden = Dense(256, activation='relu')(tf.cast(x_ph, tf.float32)) qz = Normal(mu=Dense(d)(hidden), sigma=Dense(d, activation='softplus')(hidden)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer)
class BayesianRegression: def __init__(self, in_dim=1, n_classes=2): """ Bayesian Logistic regression based on Edward lib (http://edwardlib.org). y = W * x + b :param in_dim: :param n_classes: """ self.in_dim = in_dim self.n_classes = n_classes self.X = tf.placeholder(tf.float32, [None, self.in_dim]) self.W = Normal(loc=tf.zeros([self.in_dim, self.n_classes]), scale=tf.ones([self.in_dim, self.n_classes])) self.b = Normal(loc=tf.zeros(self.n_classes), scale=tf.ones(self.n_classes)) h = tf.matmul(self.X, self.W) + self.b self.y = Normal(loc=tf.sigmoid(-h), scale=0.1) self.qW = Normal(loc=tf.get_variable("qW/loc", [self.in_dim, self.n_classes]), scale=tf.nn.softplus( tf.get_variable("qW/scale", [self.in_dim, self.n_classes]))) self.qb = Normal(loc=tf.get_variable("qb/loc", [self.n_classes]), scale=tf.nn.softplus( tf.get_variable("qb/scale", [self.n_classes]))) def infer(self, X, y, n_samples=5, n_iter=250): inference = ed.KLqp({ self.W: self.qW, self.b: self.qb, }, data={ self.y: y, self.X: X }) inference.run(n_samples=n_samples, n_iter=n_iter) def predict(self, X): self.qW_mean = self.qW.mean().eval() self.qb_mean = self.qb.mean().eval() h = tf.matmul(X, self.qW_mean) + self.qb_mean return tf.sigmoid(-h).eval() def sample_boudary(self, X): qW = self.qW.eval() qb = self.qb.eval() w = -qW[0][0] / qW[1][0] b = (0.5 - qb[0]) / qW[0][0] return w, b def predict_std(self, X): self.qW_stddev = self.qW.stddev().eval() self.qb_stddev = self.qb.stddev().eval() h = tf.matmul(X, self.qW_stddev) + self.qb_stddev return tf.sigmoid(-h).eval() def get_coef(self): return self.qW.mean().eval().T[0]
X = X.reshape((N, 1)) return X, y ed.set_seed(42) N = 40 # number of data points D = 1 # number of features # DATA X_train, y_train = build_toy_dataset(N) X_test, y_test = build_toy_dataset(N) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run() # CRITICISM y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to
import numpy as np import tensorflow as tf from edward.models import Normal, Poisson ed.set_seed(42) # DATA x_train = np.load('data/celegans_brain.npy') # MODEL N = x_train.shape[0] # number of data points K = 3 # latent dimensionality z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K])) # Calculate N x N distance matrix. # 1. Create a vector, [||z_1||^2, ||z_2||^2, ..., ||z_N||^2], and tile # it to create N identical rows. xp = tf.tile(tf.reduce_sum(tf.pow(z, 2), 1, keep_dims=True), [1, N]) # 2. Create a N x N matrix where entry (i, j) is ||z_i||^2 + ||z_j||^2 # - 2 z_i^T z_j. xp = xp + tf.transpose(xp) - 2 * tf.matmul(z, z, transpose_b=True) # 3. Invert the pairwise distances and make rate along diagonals to # be close to zero. xp = 1.0 / tf.sqrt(xp + tf.diag(tf.zeros(N) + 1e3)) # Note Edward doesn't currently support sampling for Poisson. # Hard-code it to 0's for now; it isn't used during inference. x = Poisson(lam=xp, value=tf.zeros_like(xp))
def test_permute(self): x = Normal(loc=tf.zeros([100, 10, 5]), scale=tf.ones([100, 10, 5])) y = layers.Permute((2, 1))(x.value()) with self.test_session(): self.assertEqual(y.eval().shape, (100, 5, 10))
from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Variational, Normal from edward.stats import multivariate_normal from edward.util import get_dims class NormalPosterior: """p(x, z) = p(z) = p(z | x) = Normal(z; mu, Sigma)""" def __init__(self, mu, Sigma): self.mu = mu self.Sigma = Sigma self.n_vars = get_dims(mu)[0] def log_prob(self, xs, zs): return multivariate_normal.logpdf(zs, self.mu, self.Sigma) ed.set_seed(42) mu = tf.constant([1.0, 1.0]) Sigma = tf.constant([[1.0, 0.1], [0.1, 1.0]]) model = NormalPosterior(mu, Sigma) variational = Variational() variational.add(Normal(model.n_vars)) inference = ed.MFVI(model, variational) inference.run(n_iter=10000)
def test_lambda(self): x = Normal(loc=tf.zeros([100, 10, 5]), scale=tf.ones([100, 10, 5])) y = layers.Lambda(lambda x: x ** 2)(x.value())
def pred_nn(x, W_0, b_0, W_1, b_1): h = tf.nn.softplus(tf.matmul(x, W_0) + b_0) o = tf.matmul(h, W_1) + b_1 return tf.reshape(o, [-1]) # mean squared error def mse(Y_true, Y_hat): sq_err = (Y_true - Y_hat)**2 return np.mean(sq_err) # Define prior graph if str(sys.argv[5]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=std**2/D*tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=std**2/D*tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K)) if str(sys.argv[5]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std*D**(-.5)*tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std*n_hidden**(-.5)*tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std*D**(-.5)*tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std*n_hidden**(-.5)*tf.ones(K)) if str(sys.argv[5]) == 'T': W_0 = StudentT(df=df*tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std**2/D*tf.ones([D, n_hidden])) W_1 = StudentT(df=df*tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K])) b_0 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2/D*tf.ones(n_hidden)) b_1 = StudentT(df=df*tf.ones([K]), loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K)) # Inputs x = tf.placeholder(tf.float32, [None, D]) # Gaussian likelihood y = Normal(loc=nn(x, W_0, b_0, W_1, b_1), scale=std_out*tf.ones([tf.shape(x)[0]])) # We use a placeholder for the labels in anticipation of the traning data y_ph = tf.placeholder(tf.float32, [None])
def test_masking(self): x = Normal(loc=tf.zeros([100, 10, 5]), scale=tf.ones([100, 10, 5])) y = layers.Masking()(x.value())
os.makedirs(DATA_DIR) if not os.path.exists(IMG_DIR): os.makedirs(IMG_DIR) ed.set_seed(42) M = 100 # batch size during training d = 2 # latent dimension # DATA. MNIST batches are fed at training time. mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(mu=tf.zeros([M, d]), sigma=tf.ones([M, d])) hidden = Dense(256, activation='relu')(z.value()) x = Bernoulli(logits=Dense(28 * 28)(hidden)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.float32, [M, 28 * 28]) hidden = Dense(256, activation='relu')(x_ph) qz = Normal(mu=Dense(d)(hidden), sigma=Dense(d, activation='softplus')(hidden)) sess = ed.get_session() K.set_session(sess) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph})