def FC_bayes(x, shape, activation, scope, init=1e-3, bias=True): """ initializer for a fully-connected layer with tensorflow inputs: -shape, (tuple), input,output size of layer -activation, (string), activation function to use -init, (float), multiplier for random weight initialization """ with tf.variable_scope(scope): if init == 'xavier': init = np.sqrt(2.0 / (shape[0] + shape[1])) factor = np.sqrt(2.0 / shape[0]) init = np.log(np.exp(factor) - 1) W_mu = tf.Variable(tf.zeros(shape), name='W_mu') W_sig = tf.Variable(tf.ones(shape) * init, name='W_sig') W_sig = tf.log(1.0 + tf.exp(W_sig)) W_noise = tf.placeholder(shape=shape, dtype=tf.float32, name='W_eps') b_mu = tf.Variable(tf.zeros([shape[1]]), name='b_mu') b_sig = tf.Variable(tf.ones([shape[1]]) * init, name='b_sig') b_sig = tf.log(1.0 + tf.exp(b_sig)) b_noise = tf.placeholder(shape=shape[1], dtype=tf.float32, name='b_eps') W_samp = W_mu + W_sig * W_noise b_samp = b_mu + b_sig * b_noise #reg = tf.log(tf.reduce_prod(W_sig))+tf.log(tf.reduce_prod(b_sig)) Norm_w = distributions.Normal(loc=W_mu, scale=W_sig) Norm_b = distributions.Normal(loc=b_mu, scale=b_sig) N01_w = distributions.Normal(loc=tf.zeros(shape=shape), scale=tf.ones(shape=shape) * factor) N01_b = distributions.Normal(loc=tf.zeros(shape=shape[1]), scale=tf.ones(shape=shape[1]) * factor) reg = tf.reduce_sum(distributions.kl(Norm_w,N01_w)) +\ tf.reduce_sum(distributions.kl(Norm_b,N01_b)) if activation == 'relu': activation = tf.nn.relu elif activation == 'sigmoid': activation = tf.nn.sigmoid elif activation == 'tanh': activation = tf.tanh else: activation = tf.identity if bias: h = tf.matmul(x, W_samp) + b_samp else: h = tf.matmul(x, W_samp) a = activation(h) return a, W_noise, b_noise, reg
def calculate_latent_loss(self, latent_weights): """ Calculate the latent loss in the form of KL divergence """ for posterior in self.posteriors: # NOTE: set allow_nan=True to prevent a CPU-only Assert operation kl_divergence = distributions.kl(posterior, self.prior) kl_divergence = tf.reduce_sum(latent_weights * kl_divergence, 1, name='kl_divergence') tf.losses.add_loss(tf.reduce_mean(kl_divergence, 0, name='kl_divergence/avg'))
def loss(self): # Recognition prior p_z_mu = tf.constant(0.0, dtype=tf.float32) p_z_sigma = tf.constant(1.0, dtype=tf.float32) p_z = Normal(p_z_mu, p_z_sigma) # Loss ## Reconstruction error log_p_x_given_z = tf.reduce_mean(tf.reduce_sum( self.p_x_given_z.log_prob(self.x), axis=1), name='reconstruction_error') tf.add_to_collection('losses', log_p_x_given_z) ## Regularisation KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z), axis=1), name="kl_divergence") tf.add_to_collection('losses', KL_qp) # Averaging over samples. self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound') tf.add_to_collection('losses', self.loss_op) # Add scalar summaries for the losses for l in tf.get_collection('losses'): tf.summary.scalar(l.op.name, l)
def gumbel_reparmeterization(logits_z, tau, rnd_sample=None, hard=True, eps=1e-9): ''' The gumbel-softmax reparameterization ''' latent_size = logits_z.get_shape().as_list()[1] # Prior p_z = d.OneHotCategorical( probs=tf.constant(1.0 / latent_size, shape=[latent_size])) # p_z = d.RelaxedOneHotCategorical(probs=tf.constant(1.0/latent_size, # shape=[latent_size]), # temperature=10.0) # p_z = 1.0 / latent_size # log_p_z = tf.log(p_z + eps) with st.value_type(st.SampleValue()): q_z = st.StochasticTensor( d.RelaxedOneHotCategorical(temperature=tau, logits=logits_z)) q_z_full = st.StochasticTensor(d.OneHotCategorical(logits=logits_z)) reduce_index = [1] if len(logits_z.get_shape().as_list()) == 2 else [1, 2] kl = d.kl(q_z_full.distribution, p_z, allow_nan_stats=False) if len(shp(kl)) > 1: return [q_z, tf.reduce_sum(kl, reduce_index)] else: return [q_z, kl]
def kl_loss(X_true, X_predict): latent_prior = dist.MultivariateNormalDiag( [0.] * latent_dimensions, [1.] * latent_dimensions) approximate_posterior = dist.MultivariateNormalDiag( z_mu, K.sqrt(K.exp(z_ls2))) return { 'kl_loss': K.mean(dist.kl(latent_prior, approximate_posterior)) }
def vae_loss(X_true, X_predict): xent_loss = K.sum( 0.5 * x_ls2 + (tf.square(x - x_mu) / (2.0 * tf.exp(x_ls2))), 1) latent_prior = dist.MultivariateNormalDiag( [0.] * latent_dimensions, [1.] * latent_dimensions) approximate_posterior = dist.MultivariateNormalDiag( z_mu, K.sqrt(K.exp(z_ls2))) kl_loss = dist.kl(latent_prior, approximate_posterior) return xent_loss + kl_loss
def variational_autoencoder(features, n_latent_dim=2, hidden_units=[500, 500], normalizing_flow='identity', flow_n_iter=2, kl_weight=1.0, random_state=123): features = tensor_utils.to_tensor(features, dtype=tf.float32) kl_weight = tensor_utils.to_tensor(kl_weight, dtype=tf.float32) n_features = tensor_utils.get_shape(features)[1] with tf.variable_scope('inference_network'): q_mu, q_sigma = ops.gaussian_inference_network( x=features, n_latent_dim=n_latent_dim, hidden_units=hidden_units) #q_mu, q_chol = ops.mvn_inference_network(x=features, # n_latent_dim=n_latent_dim, # hidden_units=hidden_units) # set up the latent variables with tf.variable_scope('latent_samples'): with st.value_type(st.SampleValue()): q_z = st.StochasticTensor(dist=distributions.Normal(mu=q_mu, sigma=q_sigma), name='q_z') #q_z = st.StochasticTensor( # dist=distributions.MultivariateNormalCholesky( # mu=q_mu, chol=q_chol), # name='q_z') # transform the sample to a more complex density by performing # a normalizing flow transformation norm_flow = flow_lib.get_flow(normalizing_flow, n_iter=flow_n_iter, random_state=random_state) q_z_trans, log_det_jac = norm_flow.transform(q_z, features=features) # set up the priors with tf.variable_scope('prior'): prior = distributions.Normal(mu=np.zeros(n_latent_dim, dtype=np.float32), sigma=np.ones(n_latent_dim, dtype=np.float32)) with tf.variable_scope('generative_network'): p_x_given_z = ops.bernoulli_generative_network( z=q_z_trans, hidden_units=hidden_units, n_features=n_features) # set up elbo log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(features), 1) kl = tf.reduce_sum(distributions.kl(q_z.distribution, prior), 1) neg_elbo = -tf.reduce_mean(log_likelihood + log_det_jac - kl_weight * kl, 0) return q_mu, tf.identity(neg_elbo, name='neg_elbo')
def build_reparam_kl_loss_and_gradients(inference, var_list): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -\\text{ELBO} = - ( \mathbb{E}_{q(z; \lambda)} [ \log p(x \mid z) ] + \\text{KL}(q(z; \lambda) \| p(z)) ) based on the reparameterization trick (Kingma and Welling, 2014). It assumes the KL is analytic. Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ p_log_lik = [0.0] * inference.n_samples for s in range(inference.n_samples): # Form dictionary in order to replace conditioning on prior or # observed variable with conditioning on a specific value. scope = 'inference_' + str(id(inference)) + '/' + str(s) dict_swap = {} for x, qx in six.iteritems(inference.data): if isinstance(x, RandomVariable): if isinstance(qx, RandomVariable): qx_copy = copy(qx, scope=scope) dict_swap[x] = qx_copy.value() else: dict_swap[x] = qx for z, qz in six.iteritems(inference.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope=scope) dict_swap[z] = qz_copy.value() for x in six.iterkeys(inference.data): if isinstance(x, RandomVariable): x_copy = copy(x, dict_swap, scope=scope) p_log_lik[s] += tf.reduce_sum( inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x])) p_log_lik = tf.stack(p_log_lik) kl = tf.reduce_sum([ inference.kl_scaling.get(z, 1.0) * tf.reduce_sum(ds.kl(qz, z)) for z, qz in six.iteritems(inference.latent_vars) ]) loss = -(tf.reduce_mean(p_log_lik) - kl) grads = tf.gradients(loss, [v._ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars
def build_score_kl_loss_and_gradients(inference, var_list): """Build loss function and gradients based on the score function estimator (Paisley et al., 2012). It assumes the KL is analytic. Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ p_log_lik = [0.0] * inference.n_samples q_log_prob = [0.0] * inference.n_samples for s in range(inference.n_samples): # Form dictionary in order to replace conditioning on prior or # observed variable with conditioning on a specific value. scope = 'inference_' + str(id(inference)) + '/' + str(s) dict_swap = {} for x, qx in six.iteritems(inference.data): if isinstance(x, RandomVariable): if isinstance(qx, RandomVariable): qx_copy = copy(qx, scope=scope) dict_swap[x] = qx_copy.value() else: dict_swap[x] = qx for z, qz in six.iteritems(inference.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope=scope) dict_swap[z] = qz_copy.value() q_log_prob[s] += tf.reduce_sum( inference.scale.get(z, 1.0) * qz_copy.log_prob(tf.stop_gradient(dict_swap[z]))) for x in six.iterkeys(inference.data): if isinstance(x, RandomVariable): x_copy = copy(x, dict_swap, scope=scope) p_log_lik[s] += tf.reduce_sum( inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x])) p_log_lik = tf.stack(p_log_lik) q_log_prob = tf.stack(q_log_prob) kl = tf.reduce_sum([ inference.kl_scaling.get(z, 1.0) * tf.reduce_sum(ds.kl(qz, z)) for z, qz in six.iteritems(inference.latent_vars) ]) loss = -(tf.reduce_mean(p_log_lik) - kl) grads = tf.gradients( -(tf.reduce_mean(q_log_prob * tf.stop_gradient(p_log_lik)) - kl), [v._ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars
def define_model(self, graph, sample_size=20, samples=1, recognition=None, reuse=None, **kwargs): """ Define a VariationalAutoencoderModel. For more details see Auto-Encoding Variational Bayes: https://arxiv.org/pdf/1312.6114v10.pdf Args: sample_size: The size of the samples from the approximate posterior samples: The number of samples approximate posterior recognition: Model to generate q(z|x). Required parameter. the model, but can be set later on the VariationalAutoencoderModel. reuse: Whether to reuse variables Returns: A VariationalAutoencoderModel """ if recognition is None: raise TypeError( 'define_model() needs keyword only argument recognition') with tf.variable_scope('mean', reuse=reuse): mean = self.linear_layers(recognition.output_tensor, (sample_size), reuse=reuse)[-1] with tf.variable_scope('log_variance', reuse=reuse): log_variance = self.linear_layers(recognition.output_tensor, (sample_size), reuse=reuse)[-1] p_z = distributions.Normal(0.0, 1.0, name='P_z') q_z = distributions.Normal(mean, tf.sqrt(tf.exp(log_variance)), name='Q_z') posterior = tf.reduce_mean(q_z.sample(samples), 0) kl_divergence = tf.reduce_sum(distributions.kl(q_z, p_z), 1) return VariationalAutoencoderModel(graph, recognition, posterior, kl_divergence)
def kl_categorical(p=None, q=None, p_logits=None, q_logits=None, eps=1e-6): ''' Given p and q (as EITHER BOTH logits or softmax's) then this func returns the KL between them. Utilizes an eps in order to resolve divide by zero / log issues ''' if p_logits is not None and q_logits is not None: Q = distributions.Categorical(logits=q_logits, dtype=tf.float32) P = distributions.Categorical(logits=p_logits, dtype=tf.float32) elif p is not None and q is not None: print 'p shp = ', p.get_shape().as_list(), \ ' | q shp = ', q.get_shape().as_list() Q = distributions.Categorical(probs=q + eps, dtype=tf.float32) P = distributions.Categorical(probs=p + eps, dtype=tf.float32) else: raise Exception("please provide either logits or dists") return distributions.kl(P, Q)
def gaussian_reparmeterization(logits_z, rnd_sample=None): ''' The vanilla gaussian reparameterization from Kingma et. al z = mu + sigma * N(0, I) ''' zshp = logits_z.get_shape().as_list() assert zshp[1] % 2 == 0 q_sigma = 1e-6 + tf.nn.softplus(logits_z[:, 0:zshp[1] / 2]) q_mu = logits_z[:, zshp[1] / 2:] # Prior p_z = d.Normal(loc=tf.zeros(zshp[1] / 2), scale=tf.ones(zshp[1] / 2)) with st.value_type(st.SampleValue()): q_z = st.StochasticTensor(d.Normal(loc=q_mu, scale=q_sigma)) reduce_index = [1] if len(zshp) == 2 else [1, 2] kl = d.kl(q_z.distribution, p_z, allow_nan_stats=False) return [q_z, tf.reduce_sum(kl, reduce_index)]
def network_train(): with tf.variable_scope('data'): x = tf.placeholder(tf.float32, [None, 28, 28, 1]) with tf.name_scope('variational'): q_mu, q_sigma = Encoder(x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) q_z = distributions.Normal(loc=q_mu, scale=q_sigma) assert q_z.reparameterization_type == distributions.FULLY_REPARAMETERIZED with tf.variable_scope('model'): p_xIz_logits = Decoder(q_z.sample(), hidden_size=FLAGS.hidden_size) p_xIz = distributions.Bernoulli(logits=p_xIz_logits) posterior_predictive_samples = p_xIz.sample() with tf.variable_scope('model', reuse=True): p_z = distributions.Normal(loc=np.zeros(FLAGS.latent_dim, dtype=np.float32), scale=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample(FLAGS.n_samples) p_xIz_logits = Decoder(p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.Bernoulli(logits=p_xIz_logits) prior_predictive_samples = prior_predictive.sample() with tf.variable_scope('model', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_xIz_logits = Decoder(z_input, hidden_size=FLAGS.hidden_size) prior_predictive_inp = distributions.Bernoulli(logits=p_xIz_logits) prior_predictive_inp_sample = prior_predictive_inp.sample() kl = tf.reduce_sum(distributions.kl(q_z, p_z), 1) e_log_likelihood = tf.reduce_sum(p_xIz.log_prob(x), [1, 2, 3]) elbo = tf.reduce_sum(e_log_likelihood - kl, 0) optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(-elbo) init_op = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init_op) mnist = read_data_sets(FLAGS.data_dir) print('Saving images to: %s' % FLAGS.fig_dir) plot_elbo = [] for i in range(FLAGS.n_episodes): batch_x, _ = mnist.train.next_batch(FLAGS.batch_size) batch_x = batch_x.reshape(FLAGS.batch_size, 28, 28, 1) batch_x = (batch_x > 0.5).astype(np.float32) sess.run(optimizer, {x: batch_x}) batch_elbo = sess.run(elbo, {x: batch_x}) plot_elbo.append(batch_elbo / float(FLAGS.batch_size)) if i % 1000 == 0: batch_elbo = sess.run(elbo, {x: batch_x}) print('Episode: {0:d} ELBO: {1: .3f}'.format( i, batch_elbo / FLAGS.batch_size)) batch_posterior_predictive_samples, batch_prior_predictive_samples = sess.run( [posterior_predictive_samples, prior_predictive_samples], {x: batch_x}) for k in range(FLAGS.n_samples): f_name = os.path.join(FLAGS.fig_dir, 'episode_%d_data_%d.jpg' % (i, k)) imsave(f_name, batch_x[k, :, :, 0]) f_name = os.path.join(FLAGS.fig_dir, 'episode_%d_posterior_%d.jpg' % (i, k)) imsave(f_name, batch_posterior_predictive_samples[k, :, :, 0]) f_name = os.path.join(FLAGS.fig_dir, 'episode_%d_prior_%d.jpg' % (i, k)) imsave(f_name, batch_prior_predictive_samples[k, :, :, 0]) plt.plot(range(len(plot_elbo)), plot_elbo) plt.show()
def _kl(self, utils1, utils2, e1, e2=None): e2 = e1 if e2 is None else e2 dist1 = self._dist(utils1, e1) dist2 = self._dist(utils2, e2) return tf_dists.kl(dist1, dist2)[..., None]
def train(): # Input placeholders with tf.name_scope('arr'): x = tf.placeholder(tf.float32, [None, input_dim, 1]) # x = tf.placeholder(tf.float32, [None, input_dim,1]) # tf.summary.image('arr', x) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor( distributions.MultivariateNormalDiag(mu=q_mu, diag_stdev=q_sigma)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the generative network p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=q_z, hidden_size=FLAGS.hidden_size) # p_x_given_z_normal = generative_network(z=q_z, # hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) posterior_predictive_samples = p_x_given_z.sample() # tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32)) # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.MultivariateNormalDiag( mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.n_samples) p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=p_z_sample, hidden_size=FLAGS.hidden_size) # p_x_given_z_normal = generative_network(z=p_z_sample, # hidden_size=FLAGS.hidden_size) prior_predictive = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_samples = prior_predictive.sample() # tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32)) # Take samples from the prior with a placeholder with tf.variable_scope('model', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=z_input, hidden_size=FLAGS.hidden_size) prior_predictive_inp = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_inp_sample = prior_predictive_inp.sample() ################################################################################################# # for i in range(FLAGS.n_iterations * (n_samples2 // FLAGS.batch_size)): # offset = (i) % (n_samples2 // FLAGS.batch_size) # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1) # # # np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size] # # # np_x_fixed = np_x_fixed.reshape((FLAGS.batch_size), 116, 1) # # # np_x_fixed = (np_x_fixed > 0.5).astype(np.float32) # Build the evidence lower bound (ELBO) or the negative loss # kl = -0.5*tf.reduce_sum(1 + q_sigma - tf.square(q_mu) - tf.exp(q_sigma), reduction_indices=1) # kl = tf.reduce_sum(distributions.kl(q_z.distribution, p_z), 0) kl = distributions.kl(q_z.distribution, p_z) #Original expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) #expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(x), # [1, 2, 3]) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001) # optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_op = optimizer.minimize(-elbo) # train_op = optimizer.minimize(elbo) # Merge all the summaries tf.scalar_summary("ELBO", elbo) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) # Get fixed MNIST digits for plotting posterior means during training # for i in range(FLAGS.n_iterations*(n_samples2//FLAGS.batch_size)): # offset = (i)%(n_samples2//FLAGS.batch_size) # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1) # # # np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size] # # # np_x_fixed = np_x_fixed.reshape((FLAGS.batch_size), 116, 1) # # # np_x_fixed = (np_x_fixed > 0.5).astype(np.float32) for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)): offset = (i) % (n_samples // FLAGS.batch_size) # Re-binarize the data at every batch; this improves results # Original #np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1,input_dim, 1) np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1) np_y = training_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, 2, 1) # a = np.argmax(np_y,1) # np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1) # np_x = (np_x > 0.5).astype(np.float32) sess.run(train_op, {x: np_x}) # Print progress and save samples every so often t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'. format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # # Save samples # np_posterior_samples, np_prior_samples = sess.run( # [posterior_predictive_samples, prior_predictive_samples], {x: np_x}) # for k in range(FLAGS.n_samples): # f_name = os.path.join( # FLAGS.logdir, 'iter_%d_posterior_predictive_%d_data.jpg' % (i, k)) # imsave(f_name, np_x[k, :, :, 0]) # f_name = os.path.join( # FLAGS.logdir, 'iter_%d_posterior_predictive_%d_sample.jpg' % (i, k)) # imsave(f_name, np_posterior_samples[k, :, :, 0]) # f_name = os.path.join( # FLAGS.logdir, 'iter_%d_prior_predictive_%d.jpg' % (i, k)) # imsave(f_name, np_prior_samples[k, :, :, 0]) # For Plot using matplotlib if FLAGS.latent_dim == 2: np_q_mu = sess.run(q_mu, {x: np_x}) cmap = plt.get_cmap('jet', 2) # cmap = mpl.colors.ListedColormap(sns.color_palette("husl")) f, ax = plt.subplots(1, figsize=(6 * 1.1618, 6)) im = ax.scatter(np_q_mu[:, 0], np_q_mu[:, 1], c=np.argmax(np_y, 1), cmap=cmap, alpha=0.7) # im = ax.scatter(np_q_mu[:, 0], np_q_mu[:, 1], c=np.argmax(np_y, 1), cmap='RdBu', alpha=0.7) ax.set_xlabel( 'First dimension of sampled latent variable $z_1$') ax.set_ylabel( 'Second dimension of sampled latent variable mean $z_2$') ax.set_xlim([-3, 1]) ax.set_ylim([-3, 1]) f.colorbar(im, ax=ax, label='Patient or not') plt.tight_layout() if i % FLAGS.print_every == 0: plt.savefig( os.path.join( FLAGS.logdir, 'posterior_predictive_map_frame_%d.png' % i)) plt.close()
def train(): # Input placeholders with tf.name_scope('arr'): x = tf.placeholder(tf.float32, [None, input_dim, 1]) # x = tf.placeholder(tf.float32, [None, input_dim,1]) # tf.summary.image('arr', x) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor( distributions.MultivariateNormalDiag(mu=q_mu, diag_stdev=q_sigma)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the generative network p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=q_z, hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) posterior_predictive_samples = p_x_given_z.sample() # tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32)) # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.MultivariateNormalDiag( mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.n_samples) p_z_sample2 = p_z.sample_n(n_samples2) p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=p_z_sample2, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_samples = prior_predictive.sample() # tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32)) # Take samples from the prior with a placeholder with tf.variable_scope('model', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=z_input, hidden_size=FLAGS.hidden_size) prior_predictive_inp = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_inp_sample = prior_predictive_inp.sample() ################################################################################################# # for i in range(FLAGS.n_iterations * (n_samples2 // FLAGS.batch_size)): # offset = (i) % (n_samples2 // FLAGS.batch_size) # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1) # # # np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size] # # # np_x_fixed = np_x_fixed.reshape((FLAGS.batch_size), 116, 1) # # # np_x_fixed = (np_x_fixed > 0.5).astype(np.float32) # Build the evidence lower bound (ELBO) or the negative loss kl = distributions.kl(q_z.distribution, p_z) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001) train_op = optimizer.minimize(-elbo) # train_op = optimizer.minimize(elbo) tf.scalar_summary("ELBO", elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)): offset = (i) % (n_samples // FLAGS.batch_size) # Re-binarize the data at every batch; this improves results # Original #np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1,input_dim, 1) np_x = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1) np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, 2, 1) # np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1) # np_x = (np_x > 0.5).astype(np.float32) sess.run(train_op, {x: np_x}) # Print progress and save samples every so often t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'. format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # Save samples # np_prior_samples = sess.run(prior_predictive_samples, {x: np_x}) # if __name__ == "__main__": print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset." print "Running example oz_inputn 2,500 MNIST digits..." a = (n_samples2, FLAGS.latent_dim) np_z = np.zeros(a, dtype=np.float32) print(np_z.shape) X = sess.run(prior_predictive_inp_sample, {z_input: p_z_sample2}) # X = z_input.eval() # X = p_z_sample2.eval() # X = X.reshape(-1, FLAGS.latent_dim) # X= prior_predictive_samples.eval().reshape(-1, input_dim) # X = prior_predictive_inp_sample.eval() print(X.shape) labels = test_label print(labels.shape) Y = tsne.tsne(X, 2, 20, 30.0) np.save('Error.npy', Y) cmap = plt.get_cmap('jet', 2) plt.scatter(Y[:, 0], Y[:, 1], 20, c=np.argmax(labels, 1), cmap=cmap) plt.savefig( os.path.join(FLAGS.logdir, 'tSNE_map_frame_%d.png' % i)) # plt.scatter(Y[:,0], Y[:,1], 20, c=np.argmax(np_y, 1), cmap=cmap ) plt.close()
def train(): # Input placeholders with tf.name_scope('arr'): x = tf.placeholder(tf.float32, [None, input_dim]) with tf.name_scope('data_for_oneZ'): y = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.latent_dim*2]) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor(distributions.MultivariateNormalDiag(mu=q_mu, diag_stdev=q_sigma)) with tf.variable_scope('variational_2'): separated_mu = y[:, :FLAGS.latent_dim] separated_sigma = y[:, FLAGS.latent_dim:] q_mu_y, q_sigma_y = TimeTrajectory_foroneZ(a=separated_mu,b=separated_sigma) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z_2 = st.StochasticTensor(distributions.MultivariateNormalDiag(mu=q_mu_y, diag_stdev=q_sigma_y)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the generative network p_x_given_z_mu, p_x_given_z_sigma = generative_network(z=q_z, hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) with tf.variable_scope('model_2'): p_x_given_z_mu_2, p_x_given_z_sigma_2 = generative_network(z=q_z_2, hidden_size=FLAGS.hidden_size) p_x_given_z_2 = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu_2, diag_stdev=p_x_given_z_sigma_2) # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.MultivariateNormalDiag(mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.latent_dim*samples_for_data) p_x_given_z_mu, p_x_given_z_sigma = generative_network(z=p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) with tf.variable_scope('model_2', reuse=True): p_z_2 = distributions.MultivariateNormalDiag(mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample_2 = p_z_2.sample_n(FLAGS.latent_dim*samples_for_data) p_x_given_z_mu, p_x_given_z_sigma = generative_network(z=p_z_sample_2, hidden_size=FLAGS.hidden_size) prior_predictive_2 = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_inp_sample = prior_predictive_2.sample() # Take samples from the prior with a placeholder # with tf.variable_scope('model', reuse=True): # z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) # p_x_given_z_mu, p_x_given_z_sigma= generative_network(z=z_input, # hidden_size=FLAGS.hidden_size) # prior_predictive_inp = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev = p_x_given_z_sigma) # # prior_predictive_inp_sample = prior_predictive_inp.sample() ################################################################################################# # Build the evidence lower bound (ELBO) or the negative loss kl = distributions.kl(q_z.distribution, p_z) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00001) train_op = optimizer.minimize(-elbo) # train_op = optimizer.minimize(elbo) tf.scalar_summary("ELBO", elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) for i in range(FLAGS.n_iterations * (samples_for_data // FLAGS.batch_size)): offset = (i) % (samples_for_data // FLAGS.batch_size) # Re-binarize the data at every batch; this improves results # Original # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim) np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim) np_y_fixed = labels[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size] # _, q_mu_out, q_sigma_out = sess.run([train_op, q_mu, q_sigma], {x: np_x}) sess.run(train_op, {x: np_x}) t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format(i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / ( time.time() - t0))) t0 = time.time() # curr=[] if i in range((FLAGS.n_iterations-1) *(samples_for_data//FLAGS.batch_size), (FLAGS.n_iterations) *(samples_for_data//FLAGS.batch_size)): z_mu, z_sigma = sess.run([q_mu, q_sigma], {x: np_x}) concat_z_parameters = np.concatenate([z_mu, z_sigma], 1) # Sparsed_Z_mean = sess.run(prior_predictive_inp_sample, {y : concat_z_parameters}) Sparsed_Z_mean = sess.run(prior_predictive_inp_sample, {y: concat_z_parameters}) print(Sparsed_Z_mean.shape) Reshaped_Z_mean= np.reshape(Sparsed_Z_mean,(62,1300,-1)) print(Reshaped_Z_mean.shape)
def train(): # Input placeholders with tf.name_scope('arr'): x = tf.placeholder(tf.float32, [None, input_dim]) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor( distributions.MultivariateNormalDiag(mu=q_mu, diag_stdev=q_sigma)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the generative network p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=q_z, hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) posterior_predictive_samples = p_x_given_z.sample() # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.MultivariateNormalDiag( mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.n_samples) p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_samples = prior_predictive.sample() # Take samples from the prior with a placeholder # with tf.variable_scope('model', reuse=True): # z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) # p_x_given_z_mu, p_x_given_z_sigma= generative_network(z=z_input, # hidden_size=FLAGS.hidden_size) # prior_predictive_inp = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev = p_x_given_z_sigma) # # prior_predictive_inp_sample = prior_predictive_inp.sample() ################################################################################################# # Build the evidence lower bound (ELBO) or the negative loss kl = distributions.kl(q_z.distribution, p_z) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00001) train_op = optimizer.minimize(-elbo) # train_op = optimizer.minimize(elbo) tf.scalar_summary("ELBO", elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)): offset = (i) % (n_samples // FLAGS.batch_size) # Re-binarize the data at every batch; this improves results # Original np_x_tsne = arr # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim) np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim) np_y_fixed = labels[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size] # _, q_mu_out, q_sigma_out = sess.run([train_op, q_mu, q_sigma], {x: np_x}) sess.run(train_op, {x: np_x}) t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'. format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # # print(range((FLAGS.n_iterations-2) *(n_samples//FLAGS.batch_size), (FLAGS.n_iterations-1) *(n_samples//FLAGS.batch_size))) if i in range( (FLAGS.n_iterations - 1) * (n_samples // FLAGS.batch_size), (FLAGS.n_iterations) * (n_samples // FLAGS.batch_size)): # if offset==0 and i!=0: print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset." print "Running example of_input ADNI..." # X = sess.run(q_mu, {x: np_x}) X, q_sigma_out = sess.run([q_mu, q_sigma], {x: np_x}) # X, q_sigma_out = sess.run([q_mu, q_sigma], {x: np_x_tsne}) # np.savetxt('inferenced_z_mu_%d'%i, X) # np.savetxt('inferenced_z_sigma_%d'%i, q_sigma_out) labels_tsne = np.argmax(np_y_fixed, 1) Y = tsne.tsne(X, 2, 20, 20.0) # np.savetxt('tsne_Y_values_%d.txt'% i, Y) # # cmap = plt.get_cmap('bwr') # fig = plt.figure(facecolor="white", figsize=(10.0, 8.0)) plt.xlim(-150.0, 150.0) plt.ylim(-150.0, 150.0) plt.axis("off") if labels_tsne[0] == 1: plt.scatter(Y[:, 0], Y[:, 1], 20, c=labels_tsne, cmap=mpl.colors.ListedColormap('red')) else: plt.scatter(Y[:, 0], Y[:, 1], 20, c=labels_tsne, cmap=mpl.colors.ListedColormap('blue')) plt.savefig( os.path.join(FLAGS.logdir, 'tSNE_map_frame_%d.png' % i)) # # # ########################################################################################################################################################## # fig = plt.figure(facecolor="white", figsize=(15.0, 10.0)) # scat = plt.scatter(Y[:, 0], Y[:, 1], 20, c = labels_tsne, cmap=mpl.colors.ListedColormap('black')) scat = plt.scatter([], [], c='white') def initiation(): scat.set_offsets([]) return scat, def animate(t): x_ani = Y[:, 0].transpose() y_ani = Y[:, 1].transpose() data_ani = np.hstack( (x_ani[t:, np.newaxis], y_ani[t:, np.newaxis])) # print (data_ani) scat.set_offsets(data_ani) return scat, # # ims = [] # # timepoint = [] # # # # for a in scat(): # # timepoint.append(a) # # ims.append(timepoint) # ani = animation.FuncAnimation(fig, animate, init_func=initiation, frames=FLAGS.batch_size + 17, interval=200, blit=True) # plt.show() Writer = animation.writers['ffmpeg'] writer = Writer( fps=13, metadata=dict( artist='Kang, Eun Song (Korea University MiLab)'), bitrate=1800) # ani.save("test_%d.mov" %i, writer=writer, dpi=300) ani.save(os.path.join(FLAGS.logdir, 'test_%d.mov' % i), writer=writer, dpi=300)
def train(): # Input placeholders with tf.name_scope('arr'): x = tf.placeholder(tf.float32, [None, input_dim]) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor( distributions.MultivariateNormalDiag(mu=q_mu, diag_stdev=q_sigma)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the generative network p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=q_z, hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) posterior_predictive_samples = p_x_given_z.sample() # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.MultivariateNormalDiag( mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.n_samples) p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.MultivariateNormalDiag( mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma) prior_predictive_samples = prior_predictive.sample() # Take samples from the prior with a placeholder # with tf.variable_scope('model', reuse=True): # z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) # p_x_given_z_mu, p_x_given_z_sigma= generative_network(z=z_input, # hidden_size=FLAGS.hidden_size) # prior_predictive_inp = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev = p_x_given_z_sigma) # # prior_predictive_inp_sample = prior_predictive_inp.sample() ################################################################################################# # Build the evidence lower bound (ELBO) or the negative loss kl = distributions.kl(q_z.distribution, p_z) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00001) train_op = optimizer.minimize(-elbo) # train_op = optimizer.minimize(elbo) tf.scalar_summary("ELBO", elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)): offset = (i) % (n_samples // FLAGS.batch_size) # Re-binarize the data at every batch; this improves results # Original # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim) np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim) np_y_fixed = labels[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size] sess.run(train_op, {x: np_x}) # sess.run(train_op, {x: np_x_fixed}) t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'. format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # print(range((FLAGS.n_iterations-2) *(n_samples//FLAGS.batch_size), (FLAGS.n_iterations-1) *(n_samples//FLAGS.batch_size))) if i in range( (FLAGS.n_iterations - 1) * (n_samples // FLAGS.batch_size), (FLAGS.n_iterations) * (n_samples // FLAGS.batch_size)): # if offset==0 and i!=0:t print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset." print "Running example oz_inputn 2,500 MNIST digits..." X = sess.run(q_mu, {x: np_x}) labels_tsne = np.argmax(np_y_fixed, 1) Y = tsne_MDD.tsne(X, 2, 20, 15.0) np.save('Error.npy', Y) # cmap = plt.get_cmap('bwr') plt.xlim(-50.0, 50.0) plt.ylim(-50.0, 50.0) if labels_tsne[0] == 1: plt.scatter(Y[:, 0], Y[:, 1], 20, c=labels_tsne, cmap=mpl.colors.ListedColormap('red')) else: plt.scatter(Y[:, 0], Y[:, 1], 20, c=labels_tsne, cmap=mpl.colors.ListedColormap('blue')) labels_plt = ['{0}'.format(j) for j in range(170)] for label, a, b in zip(labels_plt, Y[:, 0], Y[:, 1]): plt.annotate(label, xy=(a, b), xytext=(-0.07, 0.07), textcoords='offset points', ha='right', va='bottom', arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0')) plt.savefig( os.path.join(FLAGS.logdir, 'tSNE_map_frame_%d.png' % i)) plt.close()
loss_match = d**2. loss_nomatch = tf.maximum(0., 1. - d)**2. losses = tf.where(match, loss_match, loss_nomatch) d_loss = tf.reduce_sum(losses) c_loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=class_hat1, labels=tf.cast(y1, tf.int32)) c_loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=class_hat2, labels=tf.cast(y2, tf.int32)) if VAE: # lx1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=rx1, labels=x1), 1) # lx2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=rx2, labels=x2), 1) lx1 = tf.reduce_sum(kl(Bernoulli(p=x1), Bernoulli(logits=rx1), 1)) lx2 = tf.reduce_sum(kl(Bernoulli(p=x2), Bernoulli(logits=rx2), 1)) lz1 = tf.reduce_sum(kl(Normal(qmu1, qv1), Normal(0., 1.)), 1) lz2 = tf.reduce_sum(kl(Normal(qmu2, qv2), Normal(0., 1.)), 1) loss = tf.reduce_sum(d_loss + c_loss1 + c_loss2 + lx1 + lx2 + lz1 + lz2) else: loss = tf.reduce_sum(d_loss + c_loss1 + c_loss2) for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='distance'): loss += tf.reduce_sum(v**2.)*1e-3 loss += tf.reduce_sum(tf.abs(v))*1e-3 # for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='classify'): # loss += tf.reduce_sum(v**2.)*1e-3 # loss += tf.reduce_sum(tf.abs(v))*1e-3 trainer = tf.train.AdamOptimizer(1e-3).minimize(loss)
def kl_divergence(self, prob0, prob1): return tf.reduce_sum(distributions.kl(prob0, prob1, name='kl_divergence'), axis=1)
def train(): """ Input placeholders""" with tf.name_scope('ROIs'): x = tf.placeholder(tf.float32, [None, input_dim]) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size, layers=FLAGS.hidden_layer, trainornot=FLAGS.train) p_z = distributions.MultivariateNormalDiag( loc=np.zeros(FLAGS.latent_dim, dtype=np.float32), scale_diag=np.ones(FLAGS.latent_dim, dtype=np.float32)) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor( distributions.MultivariateNormalDiag(loc=q_mu, scale_diag=q_sigma)) with tf.variable_scope('generative'): # The likelihood is Gaussian-distributed with parameter mu given by the generative network p_x_given_z_mu, p_x_given_z_sigma = generative_network( z=q_z, hidden_size=FLAGS.hidden_size, layers=FLAGS.hidden_layer, trainornot=FLAGS.train) p_x_given_z = distributions.MultivariateNormalDiag( loc=p_x_given_z_mu, scale_diag=p_x_given_z_sigma) with tf.variable_scope('generative', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_x_given_z_mu_2, p_x_given_z_sigma_2 = generative_network( z=z_input, hidden_size=FLAGS.hidden_size, layers=FLAGS.hidden_layer, trainornot=FLAGS.train) p_x_given_z_2 = distributions.MultivariateNormalDiag( loc=p_x_given_z_mu_2, scale_diag=p_x_given_z_sigma) prior_predictive = p_x_given_z_2.copy() prior_predictive_inp_sample = prior_predictive.sample() # Build the evidence lower bound (ELBO) or the negative loss # For no regularization term # kl = distributions.kl(q_z.distribution, p_z) # expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) # elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) kl = distributions.kl(q_z.distribution, p_z) reg_variables = slim.losses.get_regularization_losses() reg_variables_sum = tf.reduce_sum(reg_variables) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1) reg_expected_log_likelihood = expected_log_likelihood + reg_variables_sum elbo = tf.reduce_sum(reg_expected_log_likelihood - kl, 0) # Optimization optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001) train_op = optimizer.minimize(-elbo) tf.summary.scalar("ELBO", elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) randidx = np.random.permutation( np.arange(samples_for_data, dtype=np.uint32)) saver = tf.train.Saver() #Batchsize cur_epoch = 0 for i in range( (FLAGS.n_iterations * samples_for_data) // FLAGS.batch_size): offset = (i) % (samples_for_data // FLAGS.batch_size) np_x = arr[randidx[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size]].reshape(-1, input_dim).copy() sess.run(train_op, {x: np_x}) t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) if cur_epoch != int((i * FLAGS.batch_size) / samples_for_data): # print("Saved in path", saver.save(sess, os.path.join(FLAGS.logdir, "%02d.ckpt" % (cur_epoch)))) randidx = np.random.permutation(samples_for_data) cur_epoch = int((i * FLAGS.batch_size) / samples_for_data) t0 = time.time() saver.save(sess, os.path.join(FLAGS.logdir, 'savedmodel_final.ckpt'))
# # Imports import matplotlib.pyplot as plt import matplotlib.cm as cm import numpy as np import tensorflow as tf import tensorflow.contrib.distributions as dis from keras import backend as K from keras.layers import Input, Dense, Lambda, Layer from keras.models import Model from keras import metrics from keras.datasets import mnist from scipy.stats import norm # # Variational autoencoder (VAE) Normal = tf.contrib.distributions.Normal t = dis.kl(Normal(3.0, 2.0), Normal(0.0, 1.0)) #mean, st dev t2 = dis.kl(Normal(3.0, 1.0), Normal(2.9, 1.0)) t3 = dis.kl(Normal(3.0, 1.0), Normal(3.0, 1.0)) with tf.Session() as session: t_val = session.run(t) print('KLD(N(3,2), N(0,1)) =', t_val, ", value = ", .5 * (np.log(2) - 1 + 2.0 + 3**2)) t_val = session.run(t2) print('KLD(N(3,1), N(2.9,1)) =', t_val) t_val = session.run(t3) print('KLD(N(3,1), N(3,1)) =', t_val) # # Implementing the variational auto encoder #hyper parameters
def _elbo(form, log_likelihood, log_joint, variational_with_prior, keep_batch_dim): """Internal implementation of ELBO. Users should use `elbo`. Args: form: ELBOForms constant. Controls how the ELBO is computed. log_likelihood: `Tensor` log p(x|Z). log_joint: `Tensor` log p(x, Z). variational_with_prior: `dict<DistributionTensor, Distribution>`, varational distributions to prior distributions. keep_batch_dim: bool. Whether to keep the batch dimension when reducing the entropy/KL. Returns: ELBO `Tensor` with same shape and dtype as `log_likelihood`/`log_joint`. """ ELBOForms.check_form(form) # Order of preference # 1. Analytic KL: log_likelihood - KL(q||p) # 2. Analytic entropy: log_likelihood + log p(Z) + H[q], or log_joint + H[q] # 3. Sample: log_likelihood - (log q(Z) - log p(Z)) = # log_likelihood + log p(Z) - log q(Z), or log_joint - q(Z) def _reduce(val): if keep_batch_dim: return val else: return math_ops.reduce_sum(val) kl_terms = [] entropy_terms = [] prior_terms = [] for q, z, p in [(qz.distribution, qz.value(), pz) for qz, pz in variational_with_prior.items()]: # Analytic KL kl = None if log_joint is None and form in {ELBOForms.default, ELBOForms.analytic_kl}: try: kl = distributions.kl(q, p) logging.info("Using analytic KL between q:%s, p:%s", q, p) except NotImplementedError as e: if form == ELBOForms.analytic_kl: raise e if kl is not None: kl_terms.append(-1. * _reduce(kl)) continue # Analytic entropy entropy = None if form in {ELBOForms.default, ELBOForms.analytic_entropy}: try: entropy = q.entropy() logging.info("Using analytic entropy for q:%s", q) except NotImplementedError as e: if form == ELBOForms.analytic_entropy: raise e if entropy is not None: entropy_terms.append(_reduce(entropy)) if log_likelihood is not None: prior = p.log_prob(z) prior_terms.append(_reduce(prior)) continue # Sample if form in {ELBOForms.default, ELBOForms.sample}: entropy = -q.log_prob(z) entropy_terms.append(_reduce(entropy)) if log_likelihood is not None: prior = p.log_prob(z) prior_terms.append(_reduce(prior)) first_term = log_joint if log_joint is not None else log_likelihood return sum([first_term] + kl_terms + entropy_terms + prior_terms)
def _elbo(form, log_likelihood, log_joint, variational_with_prior, keep_batch_dim): """Internal implementation of ELBO. Users should use `elbo`. Args: form: ELBOForms constant. Controls how the ELBO is computed. log_likelihood: `Tensor` log p(x|Z). log_joint: `Tensor` log p(x, Z). variational_with_prior: `dict<StochasticTensor, Distribution>`, varational distributions to prior distributions. keep_batch_dim: bool. Whether to keep the batch dimension when reducing the entropy/KL. Returns: ELBO `Tensor` with same shape and dtype as `log_likelihood`/`log_joint`. """ ELBOForms.check_form(form) # Order of preference # 1. Analytic KL: log_likelihood - KL(q||p) # 2. Analytic entropy: log_likelihood + log p(Z) + H[q], or log_joint + H[q] # 3. Sample: log_likelihood - (log q(Z) - log p(Z)) = # log_likelihood + log p(Z) - log q(Z), or log_joint - q(Z) def _reduce(val): if keep_batch_dim: return val else: return math_ops.reduce_sum(val) kl_terms = [] entropy_terms = [] prior_terms = [] for q, z, p in [(qz.distribution, qz.value(), pz) for qz, pz in variational_with_prior.items()]: # Analytic KL kl = None if log_joint is None and form in { ELBOForms.default, ELBOForms.analytic_kl }: try: kl = distributions.kl(q, p) logging.info("Using analytic KL between q:%s, p:%s", q, p) except NotImplementedError as e: if form == ELBOForms.analytic_kl: raise e if kl is not None: kl_terms.append(-1. * _reduce(kl)) continue # Analytic entropy entropy = None if form in {ELBOForms.default, ELBOForms.analytic_entropy}: try: entropy = q.entropy() logging.info("Using analytic entropy for q:%s", q) except NotImplementedError as e: if form == ELBOForms.analytic_entropy: raise e if entropy is not None: entropy_terms.append(_reduce(entropy)) if log_likelihood is not None: prior = p.log_prob(z) prior_terms.append(_reduce(prior)) continue # Sample if form in {ELBOForms.default, ELBOForms.sample}: entropy = -q.log_prob(z) entropy_terms.append(_reduce(entropy)) if log_likelihood is not None: prior = p.log_prob(z) prior_terms.append(_reduce(prior)) first_term = log_joint if log_joint is not None else log_likelihood return sum([first_term] + kl_terms + entropy_terms + prior_terms)
def _create_vlb(self): if FLAGS.dynamics == False: # When there is no transition dynamics, the loss is composed of two terms: # 1.) The reconstruction loss (the negative log probability # of the input under the reconstructed Gaussian distribution # induced by the decoder in the data space). # Adding 1e-10 to avoid evaluation of log(0.0) # Q_phi = distributions.MultivariateNormalDiag(self.x_recons_mean, tf.sqrt(tf.exp(self.x_recons_logsigma_sq))) self.log_prob_reconst = -0.5 * ( self.input_x.get_shape()[1].value * 2 * np.pi + tf.reduce_sum(tf.exp(self.x_recons_logsigma_sq), axis=1) + tf.reduce_sum(tf.square(self.input_x - self.x_recons_mean) / tf.exp(self.x_recons_logsigma_sq), axis=1)) + 1e-5 # For numerical stability recon_loss = -self.log_prob_reconst reconstr_loss = \ -tf.reduce_sum(self.input_x * tf.log(1e-9 + self.x_recons_mean) + (1 - self.input_x) * tf.log(1e-9 + 1 - self.x_recons_mean), 1) # recon_loss = tf.reduce_sum(-tf.log(tf.reduce_sum(Q_phi.prob(self.input_x)))) # recon_loss = -tf.reduce_sum(Q_phi.prob(tf.reshape(self.input_x, [-1, FLAGS.input_dim]))) # 2.) The latent loss, which is defined as the Kullback Leibler divergence ## between the distribution in latent space induced by the encoder on # the data and some prior. This acts as a kind of regularizer. # This can be interpreted as the number of "nats" required # for transmitting the the latent space distribution given # the prior. latent_loss = -0.5 * tf.reduce_sum( 1 + self.z_sample_logsigma_sq * 2 - tf.square(self.z_sample_mean) - tf.square(tf.exp(self.z_sample_logsigma_sq)), 1) self.cost = tf.reduce_mean(reconstr_loss + latent_loss) # average over batch else: # When there is no transition dynamics, the loss is composed of two to Four terms: #See "Embed to Control: A Locally Linear Latent Dynamics Model for Control from Raw Images" by Manuel Watter, Martin Riedmiller et al. for more details #See "Stable Reinforcement Learning with Autoencoders for Tactile and Visual Data" by Herke Van Hoof, Patric Van Der Smagt, Jan Peters et al. for more details # 1.) The reconstruction loss of the state at the current time stamp (the negative log probability # of the input under the reconstructed Gaussian distribution # induced by the decoder in the data space). # Adding 1e-10 to avoid evaluation of log(0.0) # Q_eps = distributions.MultivariateNormalDiag(self.x_recons_mean, tf.sqrt(tf.exp(self.x_recons_logsigma_sq))) self.log_prob_reconst = -0.5 * ( self.input_x.get_shape()[1].value * 2 * np.pi + tf.reduce_sum(tf.exp(self.x_recons_logsigma_sq), axis=1) + tf.reduce_sum(tf.square(self.input_x - self.x_recons_mean) / tf.exp(self.x_recons_logsigma_sq), axis=1)) + 1e-5 # For numerical stability recon_loss = -self.log_prob_reconst reconstr_loss = \ -tf.reduce_sum(self.input_x * tf.log(1e-5 + self.x_recons_mean) + (1 - self.input_x) * tf.log(1e-5 + 1 - self.x_recons_mean), 1) # 2.) The latent loss, which is defined as the Kullback Leibler divergence ## between the distribution in latent space induced by the encoder on # the data and some prior. This acts as a kind of regularizer. # This can be interpreted as the number of "nats" required # for transmitting the the latent space distribution given # the prior. latent_loss = -0.5 * tf.reduce_sum( 1 + self.z_sample_logsigma_sq - tf.square(self.z_sample_mean) - tf.exp(self.z_sample_logsigma_sq), 1) # 3.) The reconstruction loss of state at the next time stamp (the negative log probability # of the input under the reconstructed Gaussian distribution # induced by the decoder in the data space). # Adding 1e-10 to avoid evaluation of log(0.0) if FLAGS.deterministic_prediction == True: self.x_predict_mean, self.x_predict_logsigma_sq = self._decoder_network( self.network_weights["weights_gener"], self.network_weights["biases_gener"], self.z_predict, share=True) self.log_prob_reconst_next = -0.5 * ( self.input_x_next.get_shape()[1].value * 2 * np.pi + tf.reduce_sum(tf.exp(self.x_predict_logsigma_sq), axis=1) + tf.reduce_sum( tf.square(self.input_x_next - self.x_predict_mean) / tf.exp(self.x_predict_logsigma_sq), axis=1)) + 1e-5 # For numerical stability # Q_eps_next = distributions.MultivariateNormalDiag(self.x_predict_mean, tf.sqrt(tf.exp(self.x_predict_logsigma_sq))) recon_loss -= self.log_prob_reconst_next reconstr_loss = \ -tf.reduce_sum(self.input_x_next * tf.log(1e-5 + self.x_predict_mean) + (1 - self.input_x_next) * tf.log(1e-5 + 1 - self.x_predict_mean), 1) else: ########## Contruct the transition dynamics distribution Q_psi_scale = tf.cholesky( tf.matmul( tf.matmul(self.W_z, tf.diag(tf.exp(self.z_sample_logsigma_sq))), tf.transpose(self.W_z)) + tf.eye(FLAGS.latent_dim)) Q_psi = distributions.MultivariateNormalCholesky( self.z_predict, Q_psi_scale) self.z_predict_sample = Q_psi.sample() ######### ########## The reconstruction loss of state at the next time stamp self.x_predict_mean_next, self.x_predict_logsigma_sq_next = self._decoder_network( self.network_weights["weights_gener"], self.network_weights["biases_gener"], self.z_predict_sample, share=True) Q_eps_next = distributions.MultivariateNormalDiag( self.x_predict_mean_next, tf.sqrt(tf.exp(self.x_predict_logsigma_sq_next))) recon_loss -= tf.reduce_sum( tf.log( Q_eps_next.prob( tf.reshape(self.input_x_next, [-1, FLAGS.input_dim]))) + 1e-5, 1) ########## if FLAGS.dyanmics_KL_constraint == True: ########## KL diverngence between the transition dynamics distribution and the encoder net for x_t+1 self.z_sample_mean_next, self.z_sample_logsigma_sq_next = self._encoder_network( self.network_weights["weights_recog"], self.network_weights["biases_recog"], self.input_x_next, share=True) Q_phi_next = distributions.MultivariateNormalDiag( self.z_sample_mean_next, tf.sqrt(tf.exp(self.z_sample_logsigma_sq_next))) latent_loss += distributions.kl(Q_psi, Q_phi_next) self.cost = tf.reduce_mean(reconstr_loss + latent_loss) # average over batch