def _initialize_classifier(self): with self.sess.as_default(): with self.sess.graph.as_default(): if self.trainable_temp: temp = tf.Variable(self.temp, name='temp') else: temp = self.temp if self.binary_classifier: if self.relaxed: self.encoding_q = RelaxedBernoulli(temp, logits = self.encoder[:,:self.k]) else: self.encoding_q = Bernoulli(logits = self.encoder[:,:self.k], dtype=self.FLOAT_TF) if self.declare_priors: if self.relaxed: self.encoding = RelaxedBernoulli(temp, probs =tf.ones([tf.shape(self.y_bwd)[0], self.k]) * 0.5) else: self.encoding = Bernoulli(probs =tf.ones([tf.shape(self.y_bwd)[0], self.k]) * 0.5, dtype=self.FLOAT_TF) if self.k: self.inference_map[self.encoding] = self.encoding_q else: self.encoding = self.encoding_q else: if self.relaxed: self.encoding_q = RelaxedOneHotCategorical(temp, logits = self.encoder[:,:self.k]) else: self.encoding_q = OneHotCategorical(logits = self.encoder[:,:self.k], dtype=self.FLOAT_TF) if self.declare_priors: if self.relaxed: self.encoding = RelaxedOneHotCategorical(temp, probs =tf.ones([tf.shape(self.y_bwd)[0], self.k]) / self.k) else: self.encoding = OneHotCategorical(probs =tf.ones([tf.shape(self.y_bwd)[0], self.k]) / self.k, dtype=self.FLOAT_TF) if self.k: self.inference_map[self.encoding] = self.encoding_q else: self.encoding = self.encoding_q
def _test(p, n): rv = Bernoulli(p=p) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = p.eval() assert np.allclose(rv.log_prob(x_tf).eval(), stats.bernoulli.logpmf(x, p))
def test_bernoulli(self): with self.test_session(): self._test_sample(Bernoulli(0.5), [], [], []) self._test_sample(Bernoulli(tf.zeros([2, 3])), [], [2, 3], []) self._test_sample(Bernoulli(0.5, sample_shape=2), [2], [], []) self._test_sample(Bernoulli(0.5, sample_shape=[2, 1]), [2, 1], [], [])
def test_metrics_classification(self): with self.test_session(): x = Bernoulli(probs=0.51) x_data = tf.constant(1) self.assertAllClose( 1.0, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Bernoulli(probs=0.51, sample_shape=5) x_data = tf.constant([1, 1, 1, 0, 0]) self.assertAllClose( 0.6, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Bernoulli(probs=tf.constant([0.51, 0.49, 0.49])) x_data = tf.constant([1, 0, 1]) self.assertAllClose( 2.0 / 3, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Categorical(probs=tf.constant([0.48, 0.51, 0.01])) x_data = tf.constant(1) self.assertAllClose( 1.0, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Categorical(probs=tf.constant([0.48, 0.51, 0.01]), sample_shape=5) x_data = tf.constant([1, 1, 1, 0, 2]) self.assertAllClose( 0.6, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Categorical( probs=tf.constant([[0.48, 0.51, 0.01], [0.51, 0.48, 0.01]])) x_data = tf.constant([1, 2]) self.assertAllClose( 0.5, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=1.0, probs=tf.constant([0.48, 0.51, 0.01])) x_data = tf.constant([0, 1, 0], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 1.0, ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=1.0, probs=tf.constant([0.48, 0.51, 0.01]), sample_shape=5) x_data = tf.constant( [[0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 0.6, ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=5.0, probs=tf.constant([0.4, 0.6, 0.0])) x_data = tf.constant([2, 3, 0], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 1.0, ed.evaluate('multinomial_accuracy', {x: x_data}, n_samples=1))
def _test(shape, n): rv = Bernoulli(shape, p=tf.zeros(shape) + 0.5) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = rv.p.eval() for idx in range(shape[0]): assert np.allclose(rv.log_prob_idx((idx,), x_tf).eval(), stats.bernoulli.logpmf(x[:, idx], p[idx]))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) hidden = tf.layers.dense(z, 256, activation=tf.nn.relu) x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256, activation=tf.nn.relu) qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d), scale=tf.layers.dense(hidden, FLAGS.d, activation=tf.nn.softplus)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Prior predictive check. images = x.eval() for m in range(FLAGS.M): imsave( os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def bernoulli_rv(probs, inference_map, session=None): session = get_session(session) with session.as_default(): with session.graph.as_default(): out = Bernoulli(probs=tf.ones_like(probs) * 00.5) out_post = Bernoulli(probs=probs) inference_map[out] = out_post return (out, inference_map)
def _test(shape, n): rv = Bernoulli(shape, p=tf.zeros(shape) + 0.5) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = rv.p.eval() for idx in range(shape[0]): assert np.allclose( rv.log_prob_idx((idx, ), x_tf).eval(), stats.bernoulli.logpmf(x[:, idx], p[idx]))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) hidden = tf.layers.dense(z, 256, activation=tf.nn.relu) x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256, activation=tf.nn.relu) qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d), scale=tf.layers.dense( hidden, FLAGS.d, activation=tf.nn.softplus)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Prior predictive check. images = x.eval() for m in range(FLAGS.M): imsave(os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def __init__(self, prob_zero, underlying, *args, **kwargs): self.prob_zero = prob_zero self.underlying = underlying self.bernoulli = Bernoulli(probs=self.prob_zero) # for sampling super(ZeroInflatedRV, self).__init__( *args, **kwargs, dtype=underlying.dtype, validate_args=underlying.validate_args, allow_nan_stats=underlying.allow_nan_stats, reparameterization_type=underlying.reparameterization_type)
def __init__(self, d, K): self.K = K # Data Placeholder self.words = tf.placeholder(tf.int32, shape=(d.n_minibatch + d.cs)) self.placeholders = self.words # Index Masks self.p_mask = tf.range(d.cs // 2, d.n_minibatch + d.cs // 2) rows = tf.tile(tf.expand_dims(tf.range(0, d.cs // 2), [0]), [d.n_minibatch, 1]) columns = tf.tile(tf.expand_dims(tf.range(0, d.n_minibatch), [1]), [1, d.cs // 2]) self.ctx_mask = tf.concat( 1, [rows + columns, rows + columns + d.cs // 2 + 1]) # Embedding vectors self.rho = tf.Variable(tf.random_normal([d.L, self.K]) / self.K) # Context vectors self.alpha = tf.Variable(tf.random_normal([d.L, self.K]) / self.K) # Taget words self.p_idx = tf.gather(self.words, self.p_mask) self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx)) # Negative samples unigram_logits = tf.tile( tf.expand_dims(tf.log(tf.constant(d.unigram)), [0]), [d.n_minibatch, 1]) self.n_idx = tf.multinomial(unigram_logits, d.ns) self.n_rho = tf.gather(self.rho, self.n_idx) # Context self.ctx_idx = tf.squeeze(tf.gather(self.words, self.ctx_mask)) self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx) ctx_sum = tf.reduce_sum(self.ctx_alphas, [1]) # Natural parameter p_eta = tf.expand_dims(tf.reduce_sum(tf.mul(self.p_rho, ctx_sum), -1), 1) n_eta = tf.reduce_sum( tf.mul(self.n_rho, tf.tile(tf.expand_dims(ctx_sum, 1), [1, d.ns, 1])), -1) # Conditional likelihood self.y_pos = Bernoulli(logits=p_eta) self.y_neg = Bernoulli(logits=n_eta) # Hallucinated data self.data = { self.y_pos: tf.ones((d.n_minibatch, 1)), self.y_neg: tf.zeros((d.n_minibatch, d.ns)) }
def __init__(self, Xtrain, ytrain, sess): self.Xtrain = Xtrain self.ytrain = ytrain self.sess = sess self.n_samples = 1000 # TODO this is hard coded and must be matched in elbo and fc. N, D = Xtrain.shape self.w = tf.placeholder(tf.float32, [D, self.n_samples]) self.X = tf.placeholder(tf.float32, [N, D]) #self.y = Bernoulli(logits=ed.dot(self.X, self.w)) self.y = Bernoulli(logits=tf.matmul(self.X, self.w)) self.prior = Normal(loc=tf.zeros([self.n_samples, D]), scale=1.0 * tf.ones([self.n_samples, D])) # TODO hard coded
def _test(shape, n): # using Bernoulli's internally implemented log_prob_idx() to check # Distribution's log_prob() rv = Bernoulli(shape, p=tf.zeros(shape)+0.5) rv_sample = rv.sample(n) with sess.as_default(): x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = rv.p.eval() val_ed = rv.log_prob(x_tf).eval() val_true = 0.0 for idx in range(shape[0]): val_true += stats.bernoulli.logpmf(x[:, idx], p[idx]) assert np.allclose(val_ed, val_true)
def _test(shape, n): # using Bernoulli's internally implemented log_prob_idx() to check # Distribution's log_prob() rv = Bernoulli(shape, p=tf.zeros(shape)+0.5) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = rv.p.eval() val_ed = rv.log_prob(x_tf).eval() val_true = 0.0 for idx in range(shape[0]): val_true += stats.bernoulli.logpmf(x[:, idx], p[idx]) assert np.allclose(val_ed, val_true)
def geometric(p): i = tf.constant(0) sample = tf.while_loop( cond=lambda i: tf.cast(1 - Bernoulli(probs=p), tf.bool), body=lambda i: i + 1, loop_vars=[i]) return sample
def run(self, adj_mat, n_iter=1000): assert adj_mat.shape[0] == adj_mat.shape[1] n_node = adj_mat.shape[0] # model gamma = Dirichlet(concentration=tf.ones([self.n_cluster])) Pi = Beta(concentration0=tf.ones([self.n_cluster, self.n_cluster]), concentration1=tf.ones([self.n_cluster, self.n_cluster])) Z = Multinomial(total_count=1., probs=gamma, sample_shape=n_node) X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # inference (point estimation) qgamma = PointMass(params=tf.nn.softmax( tf.Variable(tf.random_normal([self.n_cluster])))) qPi = PointMass(params=tf.nn.sigmoid( tf.Variable(tf.random_normal([self.n_cluster, self.n_cluster])))) qZ = PointMass(params=tf.nn.softmax( tf.Variable(tf.random_normal([n_node, self.n_cluster])))) # map estimation inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: adj_mat}) inference.initialize(n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() return qZ.mean().eval().argmax(axis=1)
def test_hmc_betabernoulli(self): """Do we correctly handle dependencies of transformed variables?""" with self.test_session() as sess: # model z = Beta(1., 1., name="z") xs = Bernoulli(probs=z, sample_shape=10) x_obs = np.asarray([0, 0, 1, 1, 0, 0, 0, 0, 0, 1], dtype=np.int32) # inference qz_samples = tf.Variable(tf.random_uniform(shape=(1000, ))) qz = ed.models.Empirical(params=qz_samples, name="z_posterior") inference_hmc = ed.inferences.HMC({z: qz}, data={xs: x_obs}) inference_hmc.run(step_size=1.0, n_steps=5, auto_transform=True) # check that inferred posterior mean/variance is close to # that of the exact Beta posterior z_unconstrained = inference_hmc.transformations[z] qz_constrained = z_unconstrained.bijector.inverse(qz_samples) qz_mean, qz_var = sess.run(tf.nn.moments(qz_constrained, 0)) true_posterior = Beta(1. + np.sum(x_obs), 1. + np.sum(1 - x_obs)) pz_mean, pz_var = sess.run( (true_posterior.mean(), true_posterior.variance())) self.assertAllClose(qz_mean, pz_mean, rtol=5e-2, atol=5e-2) self.assertAllClose(qz_var, pz_var, rtol=1e-2, atol=1e-2)
def define_likelihood(f, y, sigma_noise, compute_link): """ Define likelihood for binary observations. Parameters ---------- f : edward.RandomVariable, shape (N,) The Gaussian process prior. y : dict Mapping from indices corresponding to items to preferences. sigma_noise : float The standard deviation of observation noise. compute_link : func The link function. Returns ------- d : edward.RandomVariable, shape (N,) The observations. """ z = compute_latent(f, y, sigma_noise) phi = compute_link(z) d = Bernoulli(probs=phi) assert d.shape == (len(y), ) return d
def main(_): ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def gaussian_process_classification_example(): ed.set_seed(42) data, metadata = crabs('~/data') X_train = data[:100, 3:] y_train = data[:100, 1] N = X_train.shape[0] # Number of data points. D = X_train.shape[1] # Number of features. print('Number of data points: {}'.format(N)) print('Number of features: {}'.format(D)) #-------------------- # Model. X = tf.placeholder(tf.float32, [N, D]) f = MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(rbf(X))) y = Bernoulli(logits=f) #-------------------- # Inference. # Perform variational inference. qf = Normal(loc=tf.get_variable('qf/loc', [N]), scale=tf.nn.softplus(tf.get_variable('qf/scale', [N]))) inference = ed.KLqp({f: qf}, data={X: X_train, y: y_train}) inference.run(n_iter=5000)
class ZeroInflatedRV(RandomVariable, Distribution): """ A zero-inflated random variable. The prob_zero parameter defines the probability of inflation. """ def __init__(self, prob_zero, underlying, *args, **kwargs): self.prob_zero = prob_zero self.underlying = underlying self.bernoulli = Bernoulli(probs=self.prob_zero) # for sampling super(ZeroInflatedRV, self).__init__( *args, **kwargs, dtype=underlying.dtype, validate_args=underlying.validate_args, allow_nan_stats=underlying.allow_nan_stats, reparameterization_type=underlying.reparameterization_type) def _log_prob(self, value): not_zero_lp = self.underlying.log_prob(value) return tf.where( tf.equal(value, tf.zeros_like(value)), tf.log(self.prob_zero + (1. - self.prob_zero * tf.exp(not_zero_lp))), tf.log(1. - self.prob_zero) + not_zero_lp) def _sample_n(self, n, seed=None): zero = self.bernoulli.sample(n, seed=seed) return tf.where(tf.equal(tf.constant(1), zero), tf.zeros_like(zero, dtype=self.dtype), self.underlying.sample(n))
class Joint: ''' Wrapper to handle calculating the log p(y, w | X) = log [ p(y | X, w) * p(w) ] for a given sample of w. Should be the same as the slow version but vectorized and therefore faster. ''' def __init__(self, Xtrain, ytrain, sess): self.Xtrain = Xtrain self.ytrain = ytrain self.sess = sess self.n_samples = 1000 # TODO this is hard coded and must be matched in elbo and fc. N, D = Xtrain.shape self.w = tf.placeholder(tf.float32, [D, self.n_samples]) self.X = tf.placeholder(tf.float32, [N, D]) #self.y = Bernoulli(logits=ed.dot(self.X, self.w)) self.y = Bernoulli(logits=tf.matmul(self.X, self.w)) self.prior = Normal(loc=tf.zeros([self.n_samples, D]), scale=1.0 * tf.ones([self.n_samples, D])) # TODO hard coded def log_prob(self, samples): copied_ytrain = np.repeat(self.ytrain[:, np.newaxis], self.n_samples, axis=1) per_sample = self.sess.run(self.y.log_prob(copied_ytrain), feed_dict={ self.X: self.Xtrain, self.w: samples.T }).astype(np.float32) lik = np.sum(per_sample, axis=0) prior = np.sum(self.prior.log_prob(samples).eval(), axis=1) return lik + prior
def main(_): ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # COMPLETE CONDITIONAL p_cond = ed.complete_conditional(p) sess = ed.get_session() print('p(probs | x) type:', p_cond.parameters['name']) param_vals = sess.run( { key: val for key, val in six.iteritems(p_cond.parameters) if isinstance(val, tf.Tensor) }, {x: x_data}) print('parameters:') for key, val in six.iteritems(param_vals): print('%s:\t%.3f' % (key, val))
def test_klqp_betabernoulli(self): with self.test_session() as sess: # model z = Beta(1., 1., name="z") xs = Bernoulli(probs=z, sample_shape=10) x_obs = np.asarray([0, 0, 1, 1, 0, 0, 0, 0, 0, 1], dtype=np.int32) # inference qz_mean = tf.get_variable("qz_mean", initializer=tf.random_normal(())) qz_std = tf.nn.softplus( tf.get_variable(name="qz_prestd", initializer=tf.random_normal(()))) qz_unconstrained = ed.models.Normal(loc=qz_mean, scale=qz_std, name="z_posterior") inference_klqp = ed.inferences.KLqp({z: qz_unconstrained}, data={xs: x_obs}) inference_klqp.run(n_iter=500, auto_transform=True) z_unconstrained = inference_klqp.transformations[z] qz_constrained = z_unconstrained.bijector.inverse( qz_unconstrained.sample(1000)) qz_mean, qz_var = sess.run(tf.nn.moments(qz_constrained, 0)) true_posterior = Beta(np.sum(x_obs) + 1., np.sum(1 - x_obs) + 1.) pz_mean, pz_var = sess.run( (true_posterior.mean(), true_posterior.variance())) self.assertAllClose(qz_mean, pz_mean, rtol=5e-2, atol=5e-2) self.assertAllClose(qz_var, pz_var, rtol=1e-2, atol=1e-2)
def main(_): ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # INFERENCE qp = Empirical(params=tf.get_variable( "qp/params", [1000], initializer=tf.constant_initializer(0.5))) proposal_p = Beta(3.0, 9.0) inference = ed.MetropolisHastings({p: qp}, {p: proposal_p}, data={x: x_data}) inference.run() # CRITICISM # exact posterior has mean 0.25 and std 0.12 sess = ed.get_session() mean, stddev = sess.run([qp.mean(), qp.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) x_post = ed.copy(x, {p: qp}) tx_rep, tx = ed.ppc( lambda xs, zs: tf.reduce_mean(tf.cast(xs[x_post], tf.float32)), data={x_post: x_data}) ed.ppc_stat_hist_plot( tx[0], tx_rep, stat_name=r'$T \equiv$mean', bins=10) plt.show()
class Joint: '''Wrapper to handle calculating the joint probability of data log p(y, w | X) = log [ p(y | X, w) * p(w) ] ''' def __init__(self, X, y, sess, n_samples, logger=None): """Initialize the distribution. Constructs the graph for evaluation of joint probabilities of data X and weights (latent vars) w Args: X: [N x D] data y: [D] predicted target variable sess: tensorflow session n_samples: number of monte carlo samples to compute expectation """ self.sess = sess self.n_samples = n_samples # (N, ) -> (N, n_samples) # np.tile(y[:, np.newaxis], (1, self.n_samples)) y_matrix = np.repeat(y[:, np.newaxis], self.n_samples, axis=1) if logger is not None: self.logger = logger # Define the model graph N, D = X.shape self.X = tf.convert_to_tensor(X, dtype=tf.float32) self.Y = tf.convert_to_tensor(y_matrix, dtype=tf.float32) self.W = tf.get_variable('samples', (self.n_samples, D), tf.float32, initializer=tf.zeros_initializer()) # (N, n_samples) self.py = Bernoulli(logits=tf.matmul(self.X, tf.transpose(self.W))) self.w_prior = Normal(loc=tf.zeros([self.n_samples, D], tf.float32), scale=tf.ones([self.n_samples, D], tf.float32)) # to get prior log probability would be summed across the D features # [n_samples D] -> [n_samples] self.prior = tf.reduce_sum(self.w_prior.log_prob(self.W), axis=1) log_likelihoods = self.py.log_prob(self.Y) # (N, n_samples) self.ll = tf.reduce_sum(log_likelihoods, axis=0) # (n_samples, ) self.joint = self.ll + self.prior def log_prob(self, samples): """Log probability of samples. Since X is already given. samples, like for target distribution, for base distributions on approximation, for individual atoms are all samples of w. Args: samples: [self.n_samples x D] tensor Returns: [self.n_samples, ] joint log probability of samples, X, y """ assert samples.shape[ 0] == self.n_samples, 'Different number of samples' self.sess.run(self.W.assign(samples)) return self.joint
def dirichlet_process(alpha, base_cls, sample_n=50, *args, **kwargs): """Dirichlet process DP(``alpha``, ``base_cls(*args, **kwargs)``). Only works for scalar alpha and scalar base distribution. Parameters ---------- alpha : tf.Tensor Concentration parameter. Its shape determines the batch shape of the DP. base_cls : RandomVariable Class of base distribution. Its shape (when instantiated) determines the event shape of the DP. sample_n : int, optional Number of samples for each DP in the batch shape. *args, **kwargs : optional Arguments passed into ``base_cls``. Returns ------- tf.Tensor A ``tf.Tensor`` of shape ``[sample_n] + batch_shape + event_shape``, where ``sample_n`` is the number of samples for each DP, ``batch_shape`` is the number of independent DPs, and ``event_shape`` is the shape of the base distribution. """ def cond(k, beta_k, draws, bools): # Proceed if at least one bool is True. return tf.reduce_any(bools) def body(k, beta_k, draws, bools): k = k + 1 beta_k = beta_k * Beta(a=1.0, b=alpha) theta_k = base_cls(*args, **kwargs) # Assign ongoing samples to the new theta_k. indicator = tf.cast(bools, draws.dtype) new = indicator * theta_k draws = draws * (1.0 - indicator) + new flips = tf.cast(Bernoulli(p=beta_k), tf.bool) bools = tf.logical_and(flips, tf.equal(draws, theta_k)) return k, beta_k, draws, bools k = 0 beta_k = Beta(a=tf.ones(sample_n), b=alpha * tf.ones(sample_n)) theta_k = base_cls(*args, **kwargs) # Initialize all samples as theta_k. draws = tf.ones(sample_n) * theta_k # Flip ``sample_n`` coins, one for each sample. flips = tf.cast(Bernoulli(p=beta_k), tf.bool) # Get boolean tensor for samples that return heads # and are currently equal to theta_k. bools = tf.logical_and(flips, tf.equal(draws, theta_k)) total_sticks, _, samples, _ = tf.while_loop( cond, body, loop_vars=[k, beta_k, draws, bools]) return total_sticks, samples
def test_control_flow(self): with self.test_session(): a = Bernoulli(p=0.5) b = tf.Variable(0.0) c = tf.constant(0.0) d = tf.cond(tf.cast(a, tf.bool), lambda: b, lambda: c) e = Normal(mu=d, sigma=1.0) self.assertEqual(get_variables(d), [b]) self.assertEqual(get_variables(e), [b])
def lstm_dropout(h, dropout_prob): if dropout_prob > 0: retain_prob = 1 - dropout_prob h = tf.multiply( h, tf.cast(Bernoulli(probs=dropout_prob, sample_shape=h.shape), tf.float32)) h = h / retain_prob return h
def _test_model_parameter(self, Inference, *args, **kwargs): with self.test_session() as sess: x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) p = tf.sigmoid(tf.Variable(0.5)) x = Bernoulli(probs=p, sample_shape=10) inference = Inference({}, data={x: x_data}) inference.run(*args, **kwargs) self.assertAllClose(p.eval(), 0.2, rtol=5e-2, atol=5e-2)
def test_ordering_rv_tensor(self): # Check that random variables are copied correctly in dependency # structure. with self.test_session() as sess: ed.set_seed(12432) x = Bernoulli(logits=0.0) y = tf.cast(x, tf.float32) y_new = ed.copy(y) x_new = ed.copy(x) x_new_val, y_new_val = sess.run([x_new, y_new]) self.assertEqual(x_new_val, y_new_val)
def test_control_flow(self): with self.test_session(): a = Bernoulli(p=0.5) b = Normal(mu=0.0, sigma=1.0) c = tf.constant(0.0) d = tf.cond(tf.cast(a, tf.bool), lambda: b, lambda: c) e = Normal(mu=d, sigma=1.0) self.assertEqual(get_siblings(a), []) self.assertEqual(get_siblings(b), []) self.assertEqual(get_siblings(c), []) self.assertEqual(get_siblings(d), [e]) self.assertEqual(get_siblings(e), [])
def simple_generator(x): z = Normal(loc=x, scale=tf.ones([M, D, d])) hidden = z.value() z1 = hidden z2 = tf.transpose(hidden, [0, 2, 1]) alpha = 0.5 a = tf.matmul(z1, z2) a = tf.reshape(a, [-1, D * D]) ua = tf.gather(a, tri_idx, axis=1) p = tf.sigmoid(ua) x = Bernoulli(probs=p) return x
def _test(p, n): x = Bernoulli(p=p) val_est = get_dims(x.sample(n)) val_true = n + get_dims(p) assert val_est == val_true
from edward.models import Bernoulli, Normal from keras import backend as K from keras.layers import Dense from progressbar import ETA, Bar, Percentage, ProgressBar from scipy.misc import imsave from tensorflow.examples.tutorials.mnist import input_data ed.set_seed(42) M = 100 # batch size during training d = 2 # latent dimension # Probability model (subgraph) z = Normal(mu=tf.zeros([M, d]), sigma=tf.ones([M, d])) hidden = Dense(256, activation='relu')(z.value()) x = Bernoulli(logits=Dense(28 * 28)(hidden)) # Variational model (subgraph) x_ph = tf.placeholder(tf.float32, [M, 28 * 28]) hidden = Dense(256, activation='relu')(x_ph) qz = Normal(mu=Dense(d)(hidden), sigma=Dense(d, activation='softplus')(hidden)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. mnist = input_data.read_data_sets("data/mnist", one_hot=True) data = {x: x_ph} sess = ed.get_session() K.set_session(sess) inference = ed.KLqp({z: qz}, data) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
DATA_DIR = "data/mnist" IMG_DIR = "img" if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) if not os.path.exists(IMG_DIR): os.makedirs(IMG_DIR) # DATA mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) x_train, _ = mnist.train.next_batch(N) # MODEL z = Normal(mu=tf.zeros([N, d]), sigma=tf.ones([N, d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE T = int(100 * 1000) qz = Empirical(params=tf.Variable(tf.random_normal([T, N, d]))) inference_e = ed.HMC({z: qz}, data={x: x_train}) inference_e.initialize() inference_m = ed.MAP(data={x: x_train, z: tf.gather(qz.params, inference_e.t)}) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference_m.initialize(optimizer=optimizer) init = tf.initialize_all_variables() init.run()
def _test(self, probs, n): rv = Bernoulli(probs) dist = ds.Bernoulli(probs) x = rv.sample(n).eval() self.assertAllEqual(rv.log_prob(x).eval(), dist.log_prob(x).eval())
def _test(shape, p, n): x = Bernoulli(shape, p) val_est = tuple(get_dims(x.sample(n))) val_true = (n, ) + shape assert val_est == val_true
def _test(shape, p, size): x = Bernoulli(shape, p) val_est = tuple(get_dims(x.sample(size=size))) val_true = (size, ) + shape assert val_est == val_true