def build_update(self): """Simulate Langevin dynamics using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. #### Notes The updates assume each Empirical random variable is directly parameterized by `tf.Variable`s. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Simulate Langevin dynamics. learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) sample = {} for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint): qz = self.latent_vars[z] event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=learning_rate * tf.ones(event_shape)) sample[z] = old_sample[z] + \ 0.5 * learning_rate * tf.convert_to_tensor(grad_log_p) + \ normal.sample() # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def build_update(self): """ Simulate Langevin dynamics using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Simulate Langevin dynamics. learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32) grad_log_joint = tf.gradients(self.log_joint(old_sample), list(six.itervalues(old_sample))) sample = {} for z, qz, grad_log_p in \ zip(six.iterkeys(self.latent_vars), six.itervalues(self.latent_vars), grad_log_joint): event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=learning_rate * tf.ones(event_shape)) sample[z] = old_sample[z] + 0.5 * learning_rate * grad_log_p + \ normal.sample() # Update Empirical random variables. assign_ops = [] variables = {x.name: x for x in tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)} for z, qz in six.iteritems(self.latent_vars): variable = variables[qz.params.op.inputs[0].op.inputs[0].name] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def build_update(self): """ Simulate Langevin dynamics using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Simulate Langevin dynamics. learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) sample = {} for z, qz, grad_log_p in \ zip(six.iterkeys(self.latent_vars), six.itervalues(self.latent_vars), grad_log_joint): event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=learning_rate * tf.ones(event_shape)) sample[z] = old_sample[z] + 0.5 * learning_rate * grad_log_p + \ normal.sample() # Update Empirical random variables. assign_ops = [] variables = {x.name: x for x in tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)} for z, qz in six.iteritems(self.latent_vars): variable = variables[qz.params.op.inputs[0].op.inputs[0].name] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def _test(mu, sigma, n): rv = Normal(mu=mu, sigma=sigma) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) mu = mu.eval() sigma = sigma.eval() assert np.allclose( rv.log_prob(x_tf).eval(), stats.norm.logpdf(x, mu, sigma))
def _test(mu, sigma, n): rv = Normal(mu=mu, sigma=sigma) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) mu = mu.eval() sigma = sigma.eval() assert np.allclose(rv.log_prob(x_tf).eval(), stats.norm.logpdf(x, mu, sigma))
def build_update(self): """Simulate Hamiltonian dynamics using a numerical integrator. Correct for the integrator's discretization error using an acceptance ratio. #### Notes The updates assume each Empirical random variable is directly parameterized by `tf.Variable`s. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} old_sample = OrderedDict(old_sample) # Sample momentum. old_r_sample = OrderedDict() for z, qz in six.iteritems(self.latent_vars): event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=tf.ones(event_shape)) old_r_sample[z] = normal.sample() # Simulate Hamiltonian dynamics. new_sample, new_r_sample = leapfrog(old_sample, old_r_sample, self.step_size, self._log_joint, self.n_steps) # Calculate acceptance ratio. ratio = tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r)) for r in six.itervalues(old_r_sample)]) ratio -= tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r)) for r in six.itervalues(new_r_sample)]) ratio += self._log_joint(new_sample) ratio -= self._log_joint(old_sample) # Accept or reject sample. u = Uniform().sample() accept = tf.log(u) < ratio sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)), lambda: list(six.itervalues(old_sample))) if not isinstance(sample_values, list): # `tf.cond` returns tf.Tensor if output is a list of size 1. sample_values = [sample_values] sample = {z: sample_value for z, sample_value in zip(six.iterkeys(new_sample), sample_values)} # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept (if accepted). assign_ops.append(self.n_accept.assign_add(tf.where(accept, 1, 0))) return tf.group(*assign_ops)
def build_update(self): """ Simulate Hamiltonian dynamics using a numerical integrator. Correct for the integrator's discretization error using an acceptance ratio. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Sample momentum. old_r_sample = {} for z, qz in six.iteritems(self.latent_vars): event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=tf.ones(event_shape)) old_r_sample[z] = normal.sample() # Simulate Hamiltonian dynamics. new_sample = old_sample new_r_sample = old_r_sample for _ in range(self.n_steps): new_sample, new_r_sample = leapfrog(old_sample, old_r_sample, self.step_size, self.log_joint) # Calculate acceptance ratio. ratio = tf.reduce_sum([0.5 * tf.square(r) for r in six.itervalues(old_r_sample)]) ratio -= tf.reduce_sum([0.5 * tf.square(r) for r in six.itervalues(new_r_sample)]) ratio += self.log_joint(new_sample) ratio -= self.log_joint(old_sample) # Accept or reject sample. u = Uniform().sample() accept = tf.log(u) < ratio sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)), lambda: list(six.itervalues(old_sample))) if not isinstance(sample_values, list): # ``tf.cond`` returns tf.Tensor if output is a list of size 1. sample_values = [sample_values] sample = {z: sample_value for z, sample_value in zip(six.iterkeys(new_sample), sample_values)} # Update Empirical random variables. assign_ops = [] variables = {x.name: x for x in tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)} for z, qz in six.iteritems(self.latent_vars): variable = variables[qz.params.op.inputs[0].op.inputs[0].name] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept (if accepted). assign_ops.append(self.n_accept.assign_add(tf.select(accept, 1, 0))) return tf.group(*assign_ops)
def _test(shape, n): rv = Normal(shape, loc=tf.zeros(shape), scale=tf.ones(shape)) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) loc = rv.loc.eval() scale = rv.scale.eval() for idx in range(shape[0]): assert np.allclose( rv.log_prob_idx((idx, ), x_tf).eval(), stats.norm.logpdf(x[:, idx], loc[idx], scale[idx]))
def build_update(self): """ Simulate Hamiltonian dynamics using a numerical integrator. Correct for the integrator's discretization error using an acceptance ratio. """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} # Sample momentum. old_r_sample = {} for z, qz in six.iteritems(self.latent_vars): event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=tf.ones(event_shape)) old_r_sample[z] = normal.sample() # Simulate Hamiltonian dynamics. new_sample = old_sample new_r_sample = old_r_sample for _ in range(self.n_steps): new_sample, new_r_sample = leapfrog(old_sample, old_r_sample, self.step_size, self._log_joint) # Calculate acceptance ratio. ratio = tf.reduce_sum([0.5 * tf.square(r) for r in six.itervalues(old_r_sample)]) ratio -= tf.reduce_sum([0.5 * tf.square(r) for r in six.itervalues(new_r_sample)]) ratio += self._log_joint(new_sample) ratio -= self._log_joint(old_sample) # Accept or reject sample. u = Uniform().sample() accept = tf.log(u) < ratio sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)), lambda: list(six.itervalues(old_sample))) if not isinstance(sample_values, list): # ``tf.cond`` returns tf.Tensor if output is a list of size 1. sample_values = [sample_values] sample = {z: sample_value for z, sample_value in zip(six.iterkeys(new_sample), sample_values)} # Update Empirical random variables. assign_ops = [] variables = {x.name: x for x in tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)} for z, qz in six.iteritems(self.latent_vars): variable = variables[qz.params.op.inputs[0].op.inputs[0].name] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) # Increment n_accept (if accepted). assign_ops.append(self.n_accept.assign_add(tf.select(accept, 1, 0))) return tf.group(*assign_ops)
def build_update(self): """Use Adam to optimize the model and treat each training step as sample from the posterior distribution. Approximate SGLD by adding noise to the updates based on the current adaptive learning rate. Approximate the sampled distribution by diagonal Gaussians which parameters are incrementally updated. Notes ----- The updates assume each Empirical random variable is directly parameterized by ``tf.Variable``s. """ old_sample = {z: qz for z, qz in six.iteritems(self.empirical_vals)} # Calculate Adam updates. opt = tf.train.AdamOptimizer(self.learning_rate) grads = opt.compute_gradients(-1. * self._log_joint(old_sample), list(six.itervalues(old_sample))) train_step = opt.apply_gradients(grads) # Add noise according to current adaptive learning rate noise_step = [] with tf.control_dependencies([train_step]): for z, qz in six.iteritems(self.empirical_vals): lr = (opt._lr_t * tf.sqrt(1. - opt._beta2_power) / (1. - opt._beta1_power)) m = opt.get_slot(qz, "m") v = opt.get_slot(qz, "v") eff_lr = lr * m / (tf.sqrt(v) + opt._epsilon_t) noise_dist = Normal(mu=tf.zeros(tf.shape(qz)), sigma=2. * eff_lr * tf.ones(tf.shape(qz))) noise_add = old_sample[z].assign_add(noise_dist.sample()) noise_step.append(noise_add) # Update Empirical random variables and check whether the Gaussian # approximation should be updated this step with tf.control_dependencies(noise_step): update_approximations = tf.logical_and( tf.greater_equal(self.t, self.burn_in), tf.equal(tf.mod(self.t, self.thinning), 0)) assign_ops = [] assign_ops.append( tf.cond(update_approximations, lambda: self.build_approximation_update(), lambda: tf.no_op())) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def sample(): sw0 = qWs[0].sample(1).eval().tolist()[0] sw1 = qWs[1].sample(1).eval().tolist()[0] sw2 = qWs[2].sample(1).eval().tolist()[0] sb0 = qbs[0].sample(1).eval().tolist()[0] sb1 = qbs[1].sample(1).eval().tolist()[0] sb2 = qbs[2].sample(1).eval().tolist()[0] print(sw0) X = tf.placeholder(tf.float32, [N, D], name="X") X = np.array(X_train, dtype=np.float32) h = tf.tanh(tf.matmul(X, sw0) + sb0) h = tf.tanh(tf.matmul(h, sw1) + sb1) h = tf.matmul(h, sw2) + sb2 h = tf.reshape(h, [-1]) y = Normal(loc=h, scale=0.1 * tf.ones(N), name="y") sample = y.sample(1).eval() print('MMMorimori', sample)
def build_update(self): """Simulate Langevin dynamics using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. Approximate the sampled distribution by diagonal Gaussians which parameters are incrementally updated. Notes ----- The updates assume each Empirical random variable is directly parameterized by ``tf.Variable``s. """ old_sample = {z: qz for z, qz in six.iteritems(self.empirical_vals)} # Simulate Langevin dynamics. self.learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) train_step = [] sample = {} # Build update of Empirical random variables. for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint): qz = self.latent_vars[z] event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=self.learning_rate * tf.ones(event_shape)) sample[z] = old_sample[z] + 0.5 * self.learning_rate * grad_log_p + \ normal.sample() train_step.append(old_sample[z].assign(sample[z])) # Update Empirical random variables and check whether the Gaussian # approximation should be updated this step with tf.control_dependencies(train_step): update_approximations = tf.logical_and( tf.greater_equal(self.t, self.burn_in), tf.equal(tf.mod(self.t, self.thinning), 0)) assign_ops = [] assign_ops.append( tf.cond(update_approximations, lambda: self.build_approximation_update(), lambda: tf.no_op())) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def build_update(self): """ Simulate Hamiltonian dynamics with friction using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. Implements the update equations from (15) of Chen et al. (2014). """ old_sample = { z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars) } old_v_sample = {z: v for z, v in six.iteritems(self.v)} # Simulate Hamiltonian dynamics with friction. friction = tf.constant(self.friction, dtype=tf.float32) learning_rate = tf.constant(self.step_size * 0.01, dtype=tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) # v_sample is so named b/c it represents a velocity rather than momentum. sample = {} v_sample = {} for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint): qz = self.latent_vars[z] event_shape = qz.get_event_shape() normal = Normal(mu=tf.zeros(event_shape), sigma=(tf.sqrt(learning_rate * friction) * tf.ones(event_shape))) sample[z] = old_sample[z] + old_v_sample[z] v_sample[z] = ((1. - 0.5 * friction) * old_v_sample[z] + learning_rate * grad_log_p + normal.sample()) # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) assign_ops.append(tf.assign(self.v[z], v_sample[z]).op) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def build_update(self): """Simulate Hamiltonian dynamics with friction using a discretized integrator. Its discretization error goes to zero as the learning rate decreases. Implements the update equations from (15) of Chen et al. (2014). """ old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0)) for z, qz in six.iteritems(self.latent_vars)} old_v_sample = {z: v for z, v in six.iteritems(self.v)} # Simulate Hamiltonian dynamics with friction. friction = tf.constant(self.friction, dtype=tf.float32) learning_rate = tf.constant(self.step_size * 0.01, dtype=tf.float32) grad_log_joint = tf.gradients(self._log_joint(old_sample), list(six.itervalues(old_sample))) # v_sample is so named b/c it represents a velocity rather than momentum. sample = {} v_sample = {} for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint): qz = self.latent_vars[z] event_shape = qz.event_shape normal = Normal(loc=tf.zeros(event_shape), scale=(tf.sqrt(learning_rate * friction) * tf.ones(event_shape))) sample[z] = old_sample[z] + old_v_sample[z] v_sample[z] = ((1. - 0.5 * friction) * old_v_sample[z] + learning_rate * tf.convert_to_tensor(grad_log_p) + normal.sample()) # Update Empirical random variables. assign_ops = [] for z, qz in six.iteritems(self.latent_vars): variable = qz.get_variables()[0] assign_ops.append(tf.scatter_update(variable, self.t, sample[z])) assign_ops.append(tf.assign(self.v[z], v_sample[z]).op) # Increment n_accept. assign_ops.append(self.n_accept.assign_add(1)) return tf.group(*assign_ops)
def predict(self, X, Nsample): Nsample = 3 # sample from posterior W_post = [ self.qW[ii].sample(Nsample).eval() for ii in range(len(self.weights)) ] b_post = [ self.qb[ii].sample(Nsample).eval() for ii in range(len(self.weights)) ] aleatoric_noise = Normal(loc=tf.cast(tf.fill([tf.shape(X)[0],1],0.0),dtype=self.dtype),\ scale=tf.cast(tf.fill([tf.shape(X)[0],1],0.1),dtype=self.dtype)) noise_post = aleatoric_noise.sample(Nsample).eval() prediction = [None for ii in range(Nsample)] for ii in range(Nsample): self.weights = [W_post[ww][ii] for ww in range(len(self.weights))] self.biases = [b_post[ww][ii] for ww in range(len(self.weights))] self.weights = [ self.qW[ww].mean() for ww in range(len(self.weights)) ] self.biases = [ self.qb[ww].mean() for ww in range(len(self.weights)) ] prediction[ii] = tf.reshape(self.neural_net(X), [-1]) prediction_mean, prediction_var = tf.nn.moments(tf.stack(prediction), axes=[0]) # convert tensor -> numpy.ndarray prediction_mean = prediction_mean.eval() prediction_var = prediction_var.eval() return prediction_mean, np.sqrt(prediction_var)
class Distribution(object): def __init__(self, prior=None, name=None, **kwargs): self.prior = prior def init_posterior(self, positive=True, empirical=True, n_samples=1000): if empirical: if positive: self.posterior = Empirical(params=tf.nn.softplus( tf.Variable(tf.random_normal([n_samples])))) else: self.posterior = Empirical( params=tf.Variable(tf.random_normal([ n_samples, ]))) else: if positive: self.posterior = Normal( loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) def plot(self, n_samples=10000, show=True, bins=20): sns.distplot(self.prior.sample(n_samples).eval(), bins=bins, label="prior", hist=False, kde_kws={"shade": True}) sns.distplot(self.posterior.sample(n_samples).eval(), bins=bins, label="posterior", hist=False, kde_kws={"shade": True}) plt.legend() if show: plt.show()
def _test(shape, loc, scale, n): x = Normal(shape, loc, scale) val_est = tuple(get_dims(x.sample(n))) val_true = (n, ) + shape assert val_est == val_true
qW_0 = Normal(mu=tf.Variable(tf.random_normal([D, 2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, 2])))) qW_1 = Normal(mu=tf.Variable(tf.random_normal([2, 1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2, 1])))) qb_0 = Normal(mu=tf.Variable(tf.random_normal([2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2])))) qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) # Sample functions from variational model to visualize fits. rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1) mus = [] for s in range(10): mus += [neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(), qb_1.sample())] mus = tf.stack(mus) # FIRST VISUALIZATION (prior) sess = ed.get_session() tf.global_variables_initializer().run() outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) ax.set_title("Iteration: 0 - (CLOSE WINDOW TO CONTINUE)") ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)') ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='prior draws')
inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, n_iter=600) tf.global_variables_initializer().run() # Set up figure. fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) # Build samples from inferred posterior. n_samples = 50 inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) for _ in range(n_samples)]) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: outputs = probs.eval() # Plot data and functions plt.cla() ax.plot(X_train[:], y_train, 'bx') for s in range(n_samples): ax.plot(inputs[:], outputs[s], alpha=0.2)
sns.jointplot(qb.params.eval()[nburn:T:stride], qw.params.eval()[nburn:T:stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw) + qb, sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400, dtype=np.float32) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval()
def __init__(self, hdims, zdim, xdim, gen_scale=1.): x_ph = tf.placeholder(tf.float32, [None, xdim]) batch_size = tf.shape(x_ph)[0] sample_size = tf.placeholder(tf.int32, []) # Define the generative network (p(x | z)) with tf.variable_scope('generative', reuse=tf.AUTO_REUSE): z = Normal(loc=tf.zeros([batch_size, zdim]), scale=tf.ones([batch_size, zdim])) hidden = tf.layers.dense(z, hdims[0], activation=tf.nn.relu, name="dense1") loc = tf.layers.dense(hidden, xdim, name="dense2") x_gen = TransformedDistribution( distribution=tfd.Normal(loc=loc, scale=gen_scale), bijector=tfd.bijectors.Exp(), name="LogNormalTransformedDistribution" ) #x_gen = Bernoulli(logits=loc) # Define the inference network (q(z | x)) with tf.variable_scope('inference', reuse=tf.AUTO_REUSE): hidden = tf.layers.dense(x_ph, hdims[0], activation=tf.nn.relu) qloc = tf.layers.dense(hidden, zdim) qscale = tf.layers.dense(hidden, zdim, activation=tf.nn.softplus) qz = Normal(loc=qloc, scale=qscale) qz_sample = qz.sample(sample_size) # Define the generative network using posterior samples from q(z | x) with tf.variable_scope('generative'): qz_sample = tf.reshape(qz_sample, [-1, zdim]) hidden = tf.layers.dense(qz_sample, hdims[0], activation=tf.nn.relu, reuse=True, name="dense1") loc = tf.layers.dense(hidden, xdim, reuse=True, name="dense2") x_gen_post = tf.exp(loc) self.x_ph = x_ph self.x_data = self.x_ph self.batch_size = batch_size self.sample_size = sample_size self.ops = { 'generative': x_gen, 'inference': qz_sample, 'generative_post': x_gen_post } self.kl_coef = tf.placeholder(tf.float32, ()) with tf.variable_scope('inference', reuse=tf.AUTO_REUSE): self.inference = ed.KLqp({z: qz}, data={x_gen: self.x_data}) self.lr = tf.placeholder(tf.float32, shape=()) optimizer = tf.train.RMSPropOptimizer(self.lr, epsilon=0.9) self.inference.initialize( optimizer=optimizer, n_samples=10, kl_scaling={z: self.kl_coef} ) # Build elbo loss to evaluate on validation data self.eval_loss, _ = self.inference.build_loss_and_gradients([])
inference.print_progress(info_dict) #################################Testing#################################### # Load the test images. X_test = mnist.test.images # TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label. Y_test = np.argmax(mnist.test.labels, axis=1) # Generate samples the posterior and store them. n_samples = 100 prob_lst = [] samples = [] w_samples = [] b_samples = [] for _ in range(n_samples): w_samp = qw.sample() b_samp = qb.sample() w_samples.append(w_samp) b_samples.append(b_samp) # Also compue the probabiliy of each class for each (w,b) sample. prob = tf.nn.softmax(tf.matmul(X_test, w_samp) + b_samp) prob_lst.append(prob.eval()) sample = tf.concat([tf.reshape(w_samp, [-1]), b_samp], 0) samples.append(sample.eval()) # Compute the accuracy of the model. # For each sample we compute the predicted class and compare with the test labels. # Predicted class is defined as the one which as maximum proability. # We perform this test for each (w,b) in the posterior giving us a set of accuracies # Finally we make a histogram of accuracies for the test data. accy_test = []
x: np.reshape(X_batch, (N, 32 * 32 * 3)), y_ph: np.reshape(Y_batch, (-1)) }) inference.print_progress(info_dict) # Load the test images. X_test = np.reshape(test_data[0], (-1, 32 * 32 * 3)).astype(np.float32) # TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label. Y_test = np.reshape(test_data[1], (-1)) # Generate samples the posterior and store them. n_samples = 10 prob_lst = [] for i in range(n_samples): w1_samp = qw1.sample() b1_samp = qb1.sample() w2_samp = qw2.sample() b2_samp = qb2.sample() w3_samp = qw3.sample() b3_samp = qb3.sample() # Also compue the probabiliy of each class for each (w,b) sample. l1_samp = tf.nn.leaky_relu(tf.matmul(X_test, w1_samp) + b1_samp) l2_samp = tf.nn.leaky_relu(tf.matmul(l1_samp, w2_samp) + b2_samp) l3_samp = tf.matmul(l2_samp, w3_samp) + b3_samp prob = tf.nn.softmax(l3_samp) prob_lst.append(prob.eval()) print(i + 1, "steps completed.")
inference.run(n_iter=1000) # Sample functions from variational model to visualize fits. # rs = np.random.RandomState(0) # inputs = np.linspace(-5, 5, num=400, dtype=np.float32) # x = tf.expand_dims(inputs, 1) # mus = tf.stack([neural_network(x, qW_0.sample(), qW_1.sample(), qW_2.sample(), qb_0.sample(), # qb_1.sample(), qb_2.sample()) # for _ in range(100)]) rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(inputs, 1) mus = tf.stack([ neural_network(x, qW_0.sample(), qW_1.sample(), qW_2.sample(), qb_0.sample(), qb_1.sample(), qb_2.sample()) for _ in range(100) ]) outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) ax.set_title("Iteration: 1000") ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)') ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='posterior draws') ax.plot(inputs, outputs[1:].T, 'r', lw=2, alpha=0.5) ax.set_xlim([-5, 5]) ax.set_ylim([-2, 2]) ax.legend()
qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K]))) qmu_mu = tf.Variable(tf.random_normal([K * D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qpi = Dirichlet(alpha=qpi_alpha) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta) data = {'x': x_train} inference = ed.KLqp({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model) inference.run(n_iter=2500, n_samples=10, n_minibatch=20) # Average per-cluster and per-data point likelihood over many posterior samples. log_liks = [] for s in range(100): zrep = {'pi': qpi.sample(()), 'mu': qmu.sample(()), 'sigma': qsigma.sample(())} log_liks += [model.predict(data, zrep)] log_liks = tf.reduce_mean(log_liks, 0) # Choose the cluster with the highest likelihood for each data point. clusters = tf.argmax(log_liks, 0).eval() plt.scatter(x_train[:, 0], x_train[:, 1], c=clusters, cmap=cm.bwr) plt.axis([-3, 3, -3, 3]) plt.title("Predicted cluster assignments") plt.show()
sess = ed.get_session() init = tf.initialize_all_variables() init.run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t'] if t % inference.n_print == 0: print("Inferred cluster means:") print(sess.run(qmu.value())) # Average per-cluster and per-data point likelihood over many posterior samples. log_liks = [] for _ in range(100): mu_sample = qmu.sample() sigma_sample = qsigma.sample() # Take per-cluster and per-data point likelihood. log_lik = [] for k in range(K): x_post = Normal(mu=tf.ones([N, 1]) * tf.gather(mu_sample, k), sigma=tf.ones([N, 1]) * tf.gather(sigma_sample, k)) log_lik.append(tf.reduce_sum(x_post.log_prob(x_train), 1)) log_lik = tf.pack(log_lik) # has shape (K, N) log_liks.append(log_lik) log_liks = tf.reduce_mean(log_liks, 0) # Choose the cluster with the highest likelihood for each data point. clusters = tf.argmax(log_liks, 0).eval()
def fwd_infer(x): h = tf.nn.relu(ed.dot(x, qW_0.sample()) + qb_0.sample()) h = tf.nn.relu(ed.dot(h, qW_1.sample()) + qb_1.sample()) h = tf.nn.sigmoid(ed.dot(h, qW_2.sample()) + qb_2.sample()) return h # Build samples from inferred posterior. print('start learning') for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: predict = tf.round( tf.sigmoid(ed.dot(train, qw.sample()) + qb.sample())) # predict = tf.round(fwd_infer(train)) correct_prediction = tf.equal(predict, label[:, 1]) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) val_predict = tf.round( tf.sigmoid(ed.dot(test, qw.sample()) + qb.sample())) # val_predict = tf.round(fwd_infer(test)) val_correct_prediction = tf.equal(val_predict, label[:, 1]) val_accuracy = tf.reduce_mean( tf.cast(val_correct_prediction, tf.float32)) print('\n \n training_accuracy : {}'.format(accuracy.eval())) print(' validation_accuracy : {} \n \n '.format(val_accuracy.eval())) ax.plot(predict.eval())
class bayesian_dynamics_model: def __init__(self, input_size, output_size): self.input_size = input_size self.output_size = output_size self.hidden_size = 20 # Declare placholder. self.x = tf.placeholder(shape=[None, self.input_size], dtype=tf.float32) self.y_ph = tf.placeholder(shape=[None, self.output_size], dtype=tf.float32) # Declare weights. self.W_0 = Normal(loc=tf.zeros([self.input_size, self.hidden_size]), scale=tf.ones([self.input_size, self.hidden_size])) self.W_1 = Normal(loc=tf.zeros([self.hidden_size, self.hidden_size]), scale=tf.ones([self.hidden_size, self.hidden_size])) self.W_2 = Normal(loc=tf.zeros([self.hidden_size, self.output_size]), scale=tf.ones([self.hidden_size, self.output_size])) self.b_0 = Normal(loc=tf.zeros(self.hidden_size), scale=tf.ones(self.hidden_size)) self.b_1 = Normal(loc=tf.zeros(self.hidden_size), scale=tf.ones(self.hidden_size)) self.b_2 = Normal(loc=tf.zeros(self.output_size), scale=tf.ones(self.output_size)) # Output of computational graph. nn_out = self.build(self.x, self.W_0, self.W_1, self.W_2, self.b_0, self.b_1, self.b_2) self.y = Normal(loc=nn_out, scale=tf.ones_like(nn_out) * .1) # Variables. self.qW_0 = Normal(loc=tf.get_variable('qW_0/loc', [self.input_size, self.hidden_size]), scale=tf.nn.softplus(tf.get_variable('qW_0/scale', [self.input_size, self.hidden_size]))) self.qW_1 = Normal(loc=tf.get_variable('qW_1/loc', [self.hidden_size, self.hidden_size]), scale=tf.nn.softplus(tf.get_variable('qW_1/scale', [self.hidden_size, self.hidden_size]))) self.qW_2 = Normal(loc=tf.get_variable('qW_2/loc', [self.hidden_size, self.output_size]), scale=tf.nn.softplus(tf.get_variable('qW_2/scale', [self.hidden_size, self.output_size]))) self.qb_0 = Normal(loc=tf.get_variable('qb_0/loc', [self.hidden_size]), scale=tf.nn.softplus(tf.get_variable('qb_0/scale', [self.hidden_size]))) self.qb_1 = Normal(loc=tf.get_variable('qb_1/loc', [self.hidden_size]), scale=tf.nn.softplus(tf.get_variable('qb_1/scale', [self.hidden_size]))) self.qb_2 = Normal(loc=tf.get_variable('qb_2/loc', [self.output_size]), scale=tf.nn.softplus(tf.get_variable('qb_2/scale', [self.output_size]))) # Sample of the posterior model. self.sample_model = [self.qW_0.sample(), self.qW_1.sample(), self.qW_2.sample(), self.qb_0.sample(), self.qb_1.sample(), self.qb_2.sample()] # Sample functions from variational model to visualize fits. self.mus = self.build(self.x, self.qW_0.sample(), self.qW_1.sample(), self.qW_2.sample(), self.qb_0.sample(), self.qb_1.sample(), self.qb_2.sample()) def initialize_inference(self, n_iter=1000*5, n_samples=5): self.inference = ed.KLqp({self.W_0: self.qW_0, self.b_0: self.qb_0, self.W_1: self.qW_1, self.b_1: self.qb_1, self.W_2: self.qW_2, self.b_2: self.qb_2}, data={self.y: self.y_ph}) self.inference.initialize(n_iter=n_iter, n_samples=n_samples) def rbf(self, x): return tf.exp(-tf.square(x)) def function(self, x): return np.sin(x) def build(self, x, W_0, W_1, W_2, b_0, b_1, b_2): '''Builds the computational graph.''' h_0 = self.rbf(tf.matmul(x, W_0) + b_0) h_1 = self.rbf(tf.matmul(h_0, W_1) + b_1) out = tf.matmul(h_1, W_2) + b_2 return out def generate_toy_data(self, noise_sd=.1, size=50): x = np.random.uniform(-3., 3., size) y1 = np.cos(x) + np.random.normal(0, noise_sd, size=size) y2 = np.sin(x) + np.random.normal(0, noise_sd, size=size) y = np.stack([y1, y2], axis=-1) return x[..., np.newaxis], y def get_batch(self, noise_sd=.1, size=50): x = np.random.uniform(-3., 3., size) y = self.function(x) + np.random.normal(0, noise_sd, size=size) return x[..., np.newaxis], y[..., np.newaxis] def visualize(self, sess, xeval, animate=False): plt.cla() plt.scatter(xeval, self.function(xeval)) for _ in range(10): yeval = sess.run(self.mus, feed_dict={self.x:xeval}) plt.plot(xeval, yeval) plt.grid() if animate == False: plt.show() else: plt.pause(1. / 60.)
def _test(shape, loc, scale, size): x = Normal(shape, loc, scale) val_est = tuple(get_dims(x.sample(size=size))) val_true = (size, ) + shape assert val_est == val_true
init.run() # Set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) # draws from approximate posterior S = 50 rs = np.random.RandomState(0) inputs = np.linspace(-5, 3, num=400, dtype=np.float32) x_in = tf.expand_dims(inputs, 1) mus = [] for s in range(S): mus += [tf.sigmoid(ed.dot(x_in, qw.sample()) + qb.sample())] mus = tf.stack(mus) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: outputs = mus.eval() # Plot data and functions plt.cla() ax.plot(x_train[:], y_train, 'bx') for s in range(S): ax.plot(inputs, outputs[s], alpha=0.2) ax.set_xlim([-5, 3])
list(np.reshape(sess.run(y_train), (-1, 10))), N) # TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label. Y_batch = np.argmax(Y_batch, axis=1) info_dict = inference.update(feed_dict={x: X_batch, y_ph: Y_batch}) inference.print_progress(info_dict) # Load the test images. X_test = tf.convert_to_tensor(x_test, dtype=tf.float32) # TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label. Y_test = np.argmax(np.reshape(sess.run(y_test), (-1, 10)), axis=1) # Generate samples the posterior and store them. n_samples = 10 prob_lst = [] for i in range(n_samples): w1_samp = tf.convert_to_tensor(qw1.sample(), dtype=tf.float32) b1_samp = qb1.sample() w2_samp = tf.convert_to_tensor(qw2.sample(), dtype=tf.float32) b2_samp = qb2.sample() w3_samp = tf.convert_to_tensor(qw3.sample(), dtype=tf.float32) b3_samp = qb3.sample() # Also compue the probabiliy of each class for each (w,b) sample. prob = tf.nn.softmax( tf.matmul( tf.matmul(tf.matmul(X_test, w1_samp) + b1_samp, w2_samp) + b2_samp, w3_samp) + b3_samp) prob_lst.append(prob.eval()) print(i + 1, "steps completed.") # Compute the accuracy of the model.
qW_0 = Normal(loc=tf.get_variable("qW_0/loc", [D, 2]), scale=tf.nn.softplus(tf.get_variable("qW_0/scale", [D, 2]))) qW_1 = Normal(loc=tf.get_variable("qW_1/loc", [2, 1]), scale=tf.nn.softplus(tf.get_variable("qW_1/scale", [2, 1]))) qb_0 = Normal(loc=tf.get_variable("qb_0/loc", [2]), scale=tf.nn.softplus(tf.get_variable("qb_0/scale", [2]))) qb_1 = Normal(loc=tf.get_variable("qb_1/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb_1/scale", [1]))) # Sample functions from variational model to visualize fits. rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(inputs, 1) mus = tf.stack( [neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(), qb_1.sample()) for _ in range(10)]) # FIRST VISUALIZATION (prior) sess = ed.get_session() tf.global_variables_initializer().run() outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) ax.set_title("Iteration: 0") ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)')
sns.jointplot(qb.params.eval()[nburn:T:stride], qw.params.eval()[nburn:T:stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval()
inference = ed.KLqp({w: qw, b: qb}, data={x: a, y_ph: data.T[1]}) inference.initialize() inference.run(n_samples=2, n_iter=150) y_post = Normal(loc=ed.dot(x, qw) + qb, scale=tf.ones(n_samples)) #y_post = ed.copy(y, {w: qw, b: qb}) #plot results X, Y = data.T[0], data.T[1] plt.plot(X, Y, "bo", label="Real data") s1 = 0.0 s2 = 0.0 n_sample = 10 #print(qw.sample(n_samples)[:, 0].eval(), qb.sample(n_samples).eval()) W_, B_ = qw.sample(n_samples)[:, 0].eval(), qb.sample(n_samples).eval() for x in qw.sample(n_samples)[:, 0].eval(): s1 += x for x in qb.sample(n_samples).eval(): s2 += x w_samples = s1 / n_samples b_samples = s2 / n_samples print("samples", w_samples, b_samples) plt.plot(X, X * W_[0] + B_[0], 'r', label='Predicted data') #plt.plot(X, X * w_samples + b_samples, 'r', label='Predicted data') plt.legend() plt.show()
qw = Normal(mu=tf.Variable(tf.random_normal([1], 0, 1)), sigma=tf.nn.softplus(tf.Variable(1*tf.random_normal([1])))) qb = Normal(mu=tf.Variable(tf.random_normal([1], 0, 1)), sigma=tf.nn.softplus(tf.Variable(1*tf.random_normal([1])))) # Set up data and the inference method to Kullback Leibler z_train = mydf.get("PassengersNorm").reshape([N,1]) x_train = mydf.get("Tempdev").reshape([N,1]) sess = ed.get_session() data = {x: x_train[:,0], z: z_train[:,0]} #inference = ed.KLqp({x_mu: qx_mu, z_mu: qz_mu, w: qw, b: qb}, data) inference = ed.KLqp({w: qw, b: qb}, data) # Set up for samples from models mus = [] for i in range(10): mus += [qw.sample()] mus = tf.stack(mus) # Inference: Quick way - No Priors possible # inference.run() # Inference: More controlled way of inference running inference.initialize(n_print=10, n_iter=600) init = tf.global_variables_initializer() init.run() # Prior samples outputs = mus.eval() priordf=pd.DataFrame(outputs) priordf['Sample']=["Sample"+str(x) for x in list(range(10))]
data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run() # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400, dtype=np.float32) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval()
sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2])))) qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {y: y_train} inference = ed.KLqp({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data) # Sample functions from variational model to visualize fits. rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1) mus = [] for s in range(10): mus += [neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(), qb_1.sample())] mus = tf.pack(mus) sess = ed.get_session() init = tf.initialize_all_variables() init.run() # FIRST VISUALIZATION (prior) outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111)
def getting_started_example(): # Simulate a toy dataset of 50 observations with a cosine relationship. ed.set_seed(42) N = 50 # Number of data points. D = 1 # Number of features. x_train, y_train = build_toy_dataset(N) #-------------------- # Define a two-layer Bayesian neural network. W_0 = Normal(loc=tf.zeros([D, 2]), scale=tf.ones([D, 2])) W_1 = Normal(loc=tf.zeros([2, 1]), scale=tf.ones([2, 1])) b_0 = Normal(loc=tf.zeros(2), scale=tf.ones(2)) b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1)) x = x_train y = Normal(loc=neural_network(x, W_0, W_1, b_0, b_1), scale=0.1 * tf.ones(N)) #-------------------- # Make inferences about the model from data. # We will use variational inference. # Specify a normal approximation over the weights and biases. # Defining tf.get_variable allows the variational factors' parameters to vary. They are initialized randomly. # The standard deviation parameters are constrained to be greater than zero according to a softplus transformation. qW_0 = Normal(loc=tf.get_variable('qW_0/loc', [D, 2]), scale=tf.nn.softplus(tf.get_variable('qW_0/scale', [D, 2]))) qW_1 = Normal(loc=tf.get_variable('qW_1/loc', [2, 1]), scale=tf.nn.softplus(tf.get_variable('qW_1/scale', [2, 1]))) qb_0 = Normal(loc=tf.get_variable('qb_0/loc', [2]), scale=tf.nn.softplus(tf.get_variable('qb_0/scale', [2]))) qb_1 = Normal(loc=tf.get_variable('qb_1/loc', [1]), scale=tf.nn.softplus(tf.get_variable('qb_1/scale', [1]))) # Sample functions from variational model to visualize fits. rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(inputs, 1) mus = tf.stack([ neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(), qb_1.sample()) for _ in range(10) ]) # First Visualization (prior). sess = ed.get_session() tf.global_variables_initializer().run() outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) ax.set_title('Iteration: 0') ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)') ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='prior draws') ax.plot(inputs, outputs[1:].T, 'r', lw=2, alpha=0.5) ax.set_xlim([-5, 5]) ax.set_ylim([-2, 2]) ax.legend() plt.show() #-------------------- # Run variational inference with the Kullback-Leibler divergence in order to infer the model's latent variables with the given data. # We specify 1000 iterations. inference = ed.KLqp({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_train}) inference.run(n_iter=1000, n_samples=5) #-------------------- # Criticize the model fit. # Bayesian neural networks define a distribution over neural networks, so we can perform a graphical check. # Draw neural networks from the inferred model and visualize how well it fits the data. # SECOND VISUALIZATION (posterior) outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) ax.set_title('Iteration: 1000') ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)') ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='posterior draws') ax.plot(inputs, outputs[1:].T, 'r', lw=2, alpha=0.5) ax.set_xlim([-5, 5]) ax.set_ylim([-2, 2]) ax.legend() plt.show()
sess = ed.get_session() init = tf.global_variables_initializer() init.run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t'] if t % inference.n_print == 0: print("Inferred cluster means:") print(sess.run(qmu.mean())) # Calculate likelihood for each data point and cluster assignment, # averaged over many posterior samples. ``x_post`` has shape (N, 100, K, D). mu_sample = qmu.sample(100) sigma_sample = qsigma.sample(100) x_post = Normal(mu=tf.ones([N, 1, 1, 1]) * mu_sample, sigma=tf.ones([N, 1, 1, 1]) * sigma_sample) x_broadcasted = tf.tile(tf.reshape(x_train, [N, 1, 1, D]), [1, 100, K, 1]) # Sum over latent dimension, then average over posterior samples. # ``log_liks`` ends up with shape (N, K). log_liks = x_post.log_prob(x_broadcasted) log_liks = tf.reduce_sum(log_liks, 3) log_liks = tf.reduce_mean(log_liks, 1) # Choose the cluster with the highest likelihood for each data point. clusters = tf.argmax(log_liks, 1).eval() plt.scatter(x_train[:, 0], x_train[:, 1], c=clusters, cmap=cm.bwr) plt.axis([-3, 3, -3, 3])
def _test(mu, sigma, n): x = Normal(mu=mu, sigma=sigma) val_est = get_dims(x.sample(n)) val_true = n + get_dims(mu) assert val_est == val_true
inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run() # CRITICISM y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval()
tf.Variable(tf.random_normal([10]), name="scale"))) with tf.name_scope("qb_1"): qb_1 = Normal(loc=tf.Variable(tf.random_normal([10]), name="loc"), scale=tf.nn.softplus( tf.Variable(tf.random_normal([10]), name="scale"))) with tf.name_scope("qb_2"): qb_2 = Normal(loc=tf.Variable(tf.random_normal([10]), name="loc"), scale=tf.nn.softplus( tf.Variable(tf.random_normal([10]), name="scale"))) # hack to get score/performance x_test = np.vstack([x_test]*6) y_test = np.hstack([y_test]*6) y_sample = tf.stack( [Categorical(logits=neural_network(x, qW_0.sample(),\ qW_1.sample(), qW_2.sample(),\ qb_0.sample(), qb_1.sample(),\ qb_2.sample())) for _ in range(10)]) # build graph inference = ed.KLqp({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2}, data={x: x_train, y: y_train}) # fit model start = time.time() inference.run(n_iter=1000, n_samples=1, logdir='log') end = time.time() print('training wall clock time: %5.2f min' % ((end-start)/60.))
qb_0 = Normal(mu=tf.Variable(tf.random_normal([2])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2])))) qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {y: y_train} inference = ed.MFVI({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data) # Sample functions from variational model to visualize fits. rs = np.random.RandomState(0) inputs = np.linspace(-5, 5, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1) mus = [] for s in range(10): mus += [ neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(), qb_1.sample()) ] mus = tf.pack(mus) sess = ed.get_session() init = tf.initialize_all_variables() init.run() # FIRST VISUALIZATION (prior) outputs = mus.eval() fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111)
def main(_): ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) X_test, y_test = build_toy_dataset(FLAGS.N) # MODEL X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) # INFERENCE qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval() b_post = qb.sample(n_posterior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_posterior_samples): output = inputs * w_post[ns] + b_post[ns] plt.plot(inputs, output) plt.show()