コード例 #1
0
ファイル: bayes_test.py プロジェクト: gmaher/bayesnn
def FC_bayes(x, shape, activation, scope, init=1e-3, bias=True):
    """
    initializer for a fully-connected layer with tensorflow
    inputs:
        -shape, (tuple), input,output size of layer
        -activation, (string), activation function to use
        -init, (float), multiplier for random weight initialization
    """
    with tf.variable_scope(scope):
        if init == 'xavier':
            init = np.sqrt(2.0 / (shape[0] + shape[1]))
        factor = np.sqrt(2.0 / shape[0])
        init = np.log(np.exp(factor) - 1)
        W_mu = tf.Variable(tf.zeros(shape), name='W_mu')
        W_sig = tf.Variable(tf.ones(shape) * init, name='W_sig')
        W_sig = tf.log(1.0 + tf.exp(W_sig))
        W_noise = tf.placeholder(shape=shape, dtype=tf.float32, name='W_eps')
        b_mu = tf.Variable(tf.zeros([shape[1]]), name='b_mu')
        b_sig = tf.Variable(tf.ones([shape[1]]) * init, name='b_sig')
        b_sig = tf.log(1.0 + tf.exp(b_sig))
        b_noise = tf.placeholder(shape=shape[1],
                                 dtype=tf.float32,
                                 name='b_eps')

        W_samp = W_mu + W_sig * W_noise
        b_samp = b_mu + b_sig * b_noise

        #reg = tf.log(tf.reduce_prod(W_sig))+tf.log(tf.reduce_prod(b_sig))
        Norm_w = distributions.Normal(loc=W_mu, scale=W_sig)
        Norm_b = distributions.Normal(loc=b_mu, scale=b_sig)
        N01_w = distributions.Normal(loc=tf.zeros(shape=shape),
                                     scale=tf.ones(shape=shape) * factor)
        N01_b = distributions.Normal(loc=tf.zeros(shape=shape[1]),
                                     scale=tf.ones(shape=shape[1]) * factor)

        reg = tf.reduce_sum(distributions.kl(Norm_w,N01_w)) +\
            tf.reduce_sum(distributions.kl(Norm_b,N01_b))
        if activation == 'relu':
            activation = tf.nn.relu
        elif activation == 'sigmoid':
            activation = tf.nn.sigmoid
        elif activation == 'tanh':
            activation = tf.tanh
        else:
            activation = tf.identity
        if bias:
            h = tf.matmul(x, W_samp) + b_samp
        else:
            h = tf.matmul(x, W_samp)
        a = activation(h)
        return a, W_noise, b_noise, reg
コード例 #2
0
 def calculate_latent_loss(self, latent_weights):
     """ Calculate the latent loss in the form of KL divergence """
     for posterior in self.posteriors:
         # NOTE: set allow_nan=True to prevent a CPU-only Assert operation
         kl_divergence = distributions.kl(posterior, self.prior)
         kl_divergence = tf.reduce_sum(latent_weights * kl_divergence, 1, name='kl_divergence')
         tf.losses.add_loss(tf.reduce_mean(kl_divergence, 0, name='kl_divergence/avg'))
コード例 #3
0
ファイル: modeling.py プロジェクト: NHLBI-BCB/scVAE
    def loss(self):

        # Recognition prior
        p_z_mu = tf.constant(0.0, dtype=tf.float32)
        p_z_sigma = tf.constant(1.0, dtype=tf.float32)
        p_z = Normal(p_z_mu, p_z_sigma)

        # Loss

        ## Reconstruction error
        log_p_x_given_z = tf.reduce_mean(tf.reduce_sum(
            self.p_x_given_z.log_prob(self.x), axis=1),
                                         name='reconstruction_error')
        tf.add_to_collection('losses', log_p_x_given_z)

        ## Regularisation
        KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z),
                                             axis=1),
                               name="kl_divergence")
        tf.add_to_collection('losses', KL_qp)

        # Averaging over samples.
        self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound')
        tf.add_to_collection('losses', self.loss_op)

        # Add scalar summaries for the losses
        for l in tf.get_collection('losses'):
            tf.summary.scalar(l.op.name, l)
コード例 #4
0
def gumbel_reparmeterization(logits_z,
                             tau,
                             rnd_sample=None,
                             hard=True,
                             eps=1e-9):
    '''
    The gumbel-softmax reparameterization
    '''
    latent_size = logits_z.get_shape().as_list()[1]

    # Prior
    p_z = d.OneHotCategorical(
        probs=tf.constant(1.0 / latent_size, shape=[latent_size]))
    # p_z = d.RelaxedOneHotCategorical(probs=tf.constant(1.0/latent_size,
    #                                                    shape=[latent_size]),
    #                                  temperature=10.0)
    # p_z = 1.0 / latent_size
    # log_p_z = tf.log(p_z + eps)

    with st.value_type(st.SampleValue()):
        q_z = st.StochasticTensor(
            d.RelaxedOneHotCategorical(temperature=tau, logits=logits_z))
        q_z_full = st.StochasticTensor(d.OneHotCategorical(logits=logits_z))

    reduce_index = [1] if len(logits_z.get_shape().as_list()) == 2 else [1, 2]
    kl = d.kl(q_z_full.distribution, p_z, allow_nan_stats=False)
    if len(shp(kl)) > 1:
        return [q_z, tf.reduce_sum(kl, reduce_index)]
    else:
        return [q_z, kl]
コード例 #5
0
 def kl_loss(X_true, X_predict):
     latent_prior = dist.MultivariateNormalDiag(
         [0.] * latent_dimensions, [1.] * latent_dimensions)
     approximate_posterior = dist.MultivariateNormalDiag(
         z_mu, K.sqrt(K.exp(z_ls2)))
     return {
         'kl_loss': K.mean(dist.kl(latent_prior, approximate_posterior))
     }
コード例 #6
0
 def vae_loss(X_true, X_predict):
     xent_loss = K.sum(
         0.5 * x_ls2 + (tf.square(x - x_mu) / (2.0 * tf.exp(x_ls2))), 1)
     latent_prior = dist.MultivariateNormalDiag(
         [0.] * latent_dimensions, [1.] * latent_dimensions)
     approximate_posterior = dist.MultivariateNormalDiag(
         z_mu, K.sqrt(K.exp(z_ls2)))
     kl_loss = dist.kl(latent_prior, approximate_posterior)
     return xent_loss + kl_loss
コード例 #7
0
ファイル: vae.py プロジェクト: joshloyal/Vary
def variational_autoencoder(features,
                            n_latent_dim=2,
                            hidden_units=[500, 500],
                            normalizing_flow='identity',
                            flow_n_iter=2,
                            kl_weight=1.0,
                            random_state=123):
    features = tensor_utils.to_tensor(features, dtype=tf.float32)
    kl_weight = tensor_utils.to_tensor(kl_weight, dtype=tf.float32)

    n_features = tensor_utils.get_shape(features)[1]
    with tf.variable_scope('inference_network'):
        q_mu, q_sigma = ops.gaussian_inference_network(
            x=features, n_latent_dim=n_latent_dim, hidden_units=hidden_units)
        #q_mu, q_chol = ops.mvn_inference_network(x=features,
        #                                         n_latent_dim=n_latent_dim,
        #                                         hidden_units=hidden_units)

    # set up the latent variables
    with tf.variable_scope('latent_samples'):
        with st.value_type(st.SampleValue()):
            q_z = st.StochasticTensor(dist=distributions.Normal(mu=q_mu,
                                                                sigma=q_sigma),
                                      name='q_z')
            #q_z = st.StochasticTensor(
            #    dist=distributions.MultivariateNormalCholesky(
            #        mu=q_mu, chol=q_chol),
            #        name='q_z')

        # transform the sample to a more complex density by performing
        # a normalizing flow transformation
        norm_flow = flow_lib.get_flow(normalizing_flow,
                                      n_iter=flow_n_iter,
                                      random_state=random_state)
        q_z_trans, log_det_jac = norm_flow.transform(q_z, features=features)

    # set up the priors
    with tf.variable_scope('prior'):
        prior = distributions.Normal(mu=np.zeros(n_latent_dim,
                                                 dtype=np.float32),
                                     sigma=np.ones(n_latent_dim,
                                                   dtype=np.float32))

    with tf.variable_scope('generative_network'):
        p_x_given_z = ops.bernoulli_generative_network(
            z=q_z_trans, hidden_units=hidden_units, n_features=n_features)

    # set up elbo
    log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(features), 1)
    kl = tf.reduce_sum(distributions.kl(q_z.distribution, prior), 1)
    neg_elbo = -tf.reduce_mean(log_likelihood + log_det_jac - kl_weight * kl,
                               0)

    return q_mu, tf.identity(neg_elbo, name='neg_elbo')
コード例 #8
0
def build_reparam_kl_loss_and_gradients(inference, var_list):
    """Build loss function. Its automatic differentiation
  is a stochastic gradient of

  .. math::

    -\\text{ELBO} =  - ( \mathbb{E}_{q(z; \lambda)} [ \log p(x \mid z) ]
          + \\text{KL}(q(z; \lambda) \| p(z)) )

  based on the reparameterization trick (Kingma and Welling, 2014).

  It assumes the KL is analytic.

  Computed by sampling from :math:`q(z;\lambda)` and evaluating the
  expectation using Monte Carlo sampling.
  """
    p_log_lik = [0.0] * inference.n_samples
    for s in range(inference.n_samples):
        # Form dictionary in order to replace conditioning on prior or
        # observed variable with conditioning on a specific value.
        scope = 'inference_' + str(id(inference)) + '/' + str(s)
        dict_swap = {}
        for x, qx in six.iteritems(inference.data):
            if isinstance(x, RandomVariable):
                if isinstance(qx, RandomVariable):
                    qx_copy = copy(qx, scope=scope)
                    dict_swap[x] = qx_copy.value()
                else:
                    dict_swap[x] = qx

        for z, qz in six.iteritems(inference.latent_vars):
            # Copy q(z) to obtain new set of posterior samples.
            qz_copy = copy(qz, scope=scope)
            dict_swap[z] = qz_copy.value()

        for x in six.iterkeys(inference.data):
            if isinstance(x, RandomVariable):
                x_copy = copy(x, dict_swap, scope=scope)
                p_log_lik[s] += tf.reduce_sum(
                    inference.scale.get(x, 1.0) *
                    x_copy.log_prob(dict_swap[x]))

    p_log_lik = tf.stack(p_log_lik)

    kl = tf.reduce_sum([
        inference.kl_scaling.get(z, 1.0) * tf.reduce_sum(ds.kl(qz, z))
        for z, qz in six.iteritems(inference.latent_vars)
    ])

    loss = -(tf.reduce_mean(p_log_lik) - kl)

    grads = tf.gradients(loss, [v._ref() for v in var_list])
    grads_and_vars = list(zip(grads, var_list))
    return loss, grads_and_vars
コード例 #9
0
def build_score_kl_loss_and_gradients(inference, var_list):
    """Build loss function and gradients based on the score function
  estimator (Paisley et al., 2012).

  It assumes the KL is analytic.

  Computed by sampling from :math:`q(z;\lambda)` and evaluating the
  expectation using Monte Carlo sampling.
  """
    p_log_lik = [0.0] * inference.n_samples
    q_log_prob = [0.0] * inference.n_samples
    for s in range(inference.n_samples):
        # Form dictionary in order to replace conditioning on prior or
        # observed variable with conditioning on a specific value.
        scope = 'inference_' + str(id(inference)) + '/' + str(s)
        dict_swap = {}
        for x, qx in six.iteritems(inference.data):
            if isinstance(x, RandomVariable):
                if isinstance(qx, RandomVariable):
                    qx_copy = copy(qx, scope=scope)
                    dict_swap[x] = qx_copy.value()
                else:
                    dict_swap[x] = qx

        for z, qz in six.iteritems(inference.latent_vars):
            # Copy q(z) to obtain new set of posterior samples.
            qz_copy = copy(qz, scope=scope)
            dict_swap[z] = qz_copy.value()
            q_log_prob[s] += tf.reduce_sum(
                inference.scale.get(z, 1.0) *
                qz_copy.log_prob(tf.stop_gradient(dict_swap[z])))

        for x in six.iterkeys(inference.data):
            if isinstance(x, RandomVariable):
                x_copy = copy(x, dict_swap, scope=scope)
                p_log_lik[s] += tf.reduce_sum(
                    inference.scale.get(x, 1.0) *
                    x_copy.log_prob(dict_swap[x]))

    p_log_lik = tf.stack(p_log_lik)
    q_log_prob = tf.stack(q_log_prob)

    kl = tf.reduce_sum([
        inference.kl_scaling.get(z, 1.0) * tf.reduce_sum(ds.kl(qz, z))
        for z, qz in six.iteritems(inference.latent_vars)
    ])

    loss = -(tf.reduce_mean(p_log_lik) - kl)
    grads = tf.gradients(
        -(tf.reduce_mean(q_log_prob * tf.stop_gradient(p_log_lik)) - kl),
        [v._ref() for v in var_list])
    grads_and_vars = list(zip(grads, var_list))
    return loss, grads_and_vars
コード例 #10
0
    def define_model(self,
                     graph,
                     sample_size=20,
                     samples=1,
                     recognition=None,
                     reuse=None,
                     **kwargs):
        """
        Define a VariationalAutoencoderModel.

        For more details see Auto-Encoding Variational Bayes:
        https://arxiv.org/pdf/1312.6114v10.pdf

        Args:
            sample_size: The size of the samples from the approximate posterior
            samples: The number of samples approximate posterior
            recognition: Model to generate q(z|x). Required parameter.
            the model, but can be set later on the VariationalAutoencoderModel.
            reuse: Whether to reuse variables

        Returns:
            A VariationalAutoencoderModel
        """
        if recognition is None:
            raise TypeError(
                'define_model() needs keyword only argument recognition')

        with tf.variable_scope('mean', reuse=reuse):
            mean = self.linear_layers(recognition.output_tensor, (sample_size),
                                      reuse=reuse)[-1]

        with tf.variable_scope('log_variance', reuse=reuse):
            log_variance = self.linear_layers(recognition.output_tensor,
                                              (sample_size),
                                              reuse=reuse)[-1]

        p_z = distributions.Normal(0.0, 1.0, name='P_z')
        q_z = distributions.Normal(mean,
                                   tf.sqrt(tf.exp(log_variance)),
                                   name='Q_z')

        posterior = tf.reduce_mean(q_z.sample(samples), 0)
        kl_divergence = tf.reduce_sum(distributions.kl(q_z, p_z), 1)
        return VariationalAutoencoderModel(graph, recognition, posterior,
                                           kl_divergence)
コード例 #11
0
    def kl_categorical(p=None, q=None, p_logits=None, q_logits=None, eps=1e-6):
        '''
        Given p and q (as EITHER BOTH logits or softmax's)
        then this func returns the KL between them.

        Utilizes an eps in order to resolve divide by zero / log issues
        '''
        if p_logits is not None and q_logits is not None:
            Q = distributions.Categorical(logits=q_logits, dtype=tf.float32)
            P = distributions.Categorical(logits=p_logits, dtype=tf.float32)
        elif p is not None and q is not None:
            print 'p shp = ', p.get_shape().as_list(), \
                ' | q shp = ', q.get_shape().as_list()
            Q = distributions.Categorical(probs=q + eps, dtype=tf.float32)
            P = distributions.Categorical(probs=p + eps, dtype=tf.float32)
        else:
            raise Exception("please provide either logits or dists")

        return distributions.kl(P, Q)
コード例 #12
0
def gaussian_reparmeterization(logits_z, rnd_sample=None):
    '''
    The vanilla gaussian reparameterization from Kingma et. al

    z = mu + sigma * N(0, I)
    '''
    zshp = logits_z.get_shape().as_list()
    assert zshp[1] % 2 == 0
    q_sigma = 1e-6 + tf.nn.softplus(logits_z[:, 0:zshp[1] / 2])
    q_mu = logits_z[:, zshp[1] / 2:]

    # Prior
    p_z = d.Normal(loc=tf.zeros(zshp[1] / 2), scale=tf.ones(zshp[1] / 2))

    with st.value_type(st.SampleValue()):
        q_z = st.StochasticTensor(d.Normal(loc=q_mu, scale=q_sigma))

    reduce_index = [1] if len(zshp) == 2 else [1, 2]
    kl = d.kl(q_z.distribution, p_z, allow_nan_stats=False)
    return [q_z, tf.reduce_sum(kl, reduce_index)]
コード例 #13
0
def network_train():
    with tf.variable_scope('data'):
        x = tf.placeholder(tf.float32, [None, 28, 28, 1])
    with tf.name_scope('variational'):
        q_mu, q_sigma = Encoder(x,
                                latent_dim=FLAGS.latent_dim,
                                hidden_size=FLAGS.hidden_size)
        q_z = distributions.Normal(loc=q_mu, scale=q_sigma)
        assert q_z.reparameterization_type == distributions.FULLY_REPARAMETERIZED
    with tf.variable_scope('model'):
        p_xIz_logits = Decoder(q_z.sample(), hidden_size=FLAGS.hidden_size)
        p_xIz = distributions.Bernoulli(logits=p_xIz_logits)
        posterior_predictive_samples = p_xIz.sample()
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.Normal(loc=np.zeros(FLAGS.latent_dim,
                                                dtype=np.float32),
                                   scale=np.ones(FLAGS.latent_dim,
                                                 dtype=np.float32))
        p_z_sample = p_z.sample(FLAGS.n_samples)
        p_xIz_logits = Decoder(p_z_sample, hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.Bernoulli(logits=p_xIz_logits)
        prior_predictive_samples = prior_predictive.sample()
    with tf.variable_scope('model', reuse=True):
        z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        p_xIz_logits = Decoder(z_input, hidden_size=FLAGS.hidden_size)
        prior_predictive_inp = distributions.Bernoulli(logits=p_xIz_logits)
        prior_predictive_inp_sample = prior_predictive_inp.sample()
    kl = tf.reduce_sum(distributions.kl(q_z, p_z), 1)
    e_log_likelihood = tf.reduce_sum(p_xIz.log_prob(x), [1, 2, 3])
    elbo = tf.reduce_sum(e_log_likelihood - kl, 0)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(-elbo)
    init_op = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init_op)
    mnist = read_data_sets(FLAGS.data_dir)
    print('Saving images to: %s' % FLAGS.fig_dir)
    plot_elbo = []
    for i in range(FLAGS.n_episodes):
        batch_x, _ = mnist.train.next_batch(FLAGS.batch_size)
        batch_x = batch_x.reshape(FLAGS.batch_size, 28, 28, 1)
        batch_x = (batch_x > 0.5).astype(np.float32)
        sess.run(optimizer, {x: batch_x})
        batch_elbo = sess.run(elbo, {x: batch_x})
        plot_elbo.append(batch_elbo / float(FLAGS.batch_size))
        if i % 1000 == 0:
            batch_elbo = sess.run(elbo, {x: batch_x})
            print('Episode: {0:d} ELBO: {1: .3f}'.format(
                i, batch_elbo / FLAGS.batch_size))
            batch_posterior_predictive_samples, batch_prior_predictive_samples = sess.run(
                [posterior_predictive_samples, prior_predictive_samples],
                {x: batch_x})
            for k in range(FLAGS.n_samples):
                f_name = os.path.join(FLAGS.fig_dir,
                                      'episode_%d_data_%d.jpg' % (i, k))
                imsave(f_name, batch_x[k, :, :, 0])
                f_name = os.path.join(FLAGS.fig_dir,
                                      'episode_%d_posterior_%d.jpg' % (i, k))
                imsave(f_name, batch_posterior_predictive_samples[k, :, :, 0])
                f_name = os.path.join(FLAGS.fig_dir,
                                      'episode_%d_prior_%d.jpg' % (i, k))
                imsave(f_name, batch_prior_predictive_samples[k, :, :, 0])
    plt.plot(range(len(plot_elbo)), plot_elbo)
    plt.show()
コード例 #14
0
 def _kl(self, utils1, utils2, e1, e2=None):
     e2 = e1 if e2 is None else e2
     dist1 = self._dist(utils1, e1)
     dist2 = self._dist(utils2, e2)
     return tf_dists.kl(dist1, dist2)[..., None]
コード例 #15
0
def train():
    # Input placeholders
    with tf.name_scope('arr'):
        x = tf.placeholder(tf.float32, [None, input_dim, 1])
        # x = tf.placeholder(tf.float32, [None, input_dim,1])
        # tf.summary.image('arr', x)

    with tf.variable_scope('variational'):
        q_mu, q_sigma = inference_network(x=x,
                                          latent_dim=FLAGS.latent_dim,
                                          hidden_size=FLAGS.hidden_size)
        with st.value_type(st.SampleValue()):
            # The variational distribution is a Normal with mean and standard
            # deviation given by the inference network
            q_z = st.StochasticTensor(
                distributions.MultivariateNormalDiag(mu=q_mu,
                                                     diag_stdev=q_sigma))

    with tf.variable_scope('model'):
        # The likelihood is Bernoulli-distributed with logits given by the generative network
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=q_z, hidden_size=FLAGS.hidden_size)
        # p_x_given_z_normal = generative_network(z=q_z,
        #                                         hidden_size=FLAGS.hidden_size)
        p_x_given_z = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        posterior_predictive_samples = p_x_given_z.sample()
        # tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32))

    # Take samples from the prior
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.MultivariateNormalDiag(
            mu=np.zeros(FLAGS.latent_dim, dtype=np.float32),
            diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32))
        p_z_sample = p_z.sample_n(FLAGS.n_samples)
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=p_z_sample, hidden_size=FLAGS.hidden_size)
        # p_x_given_z_normal = generative_network(z=p_z_sample,
        #                                         hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        prior_predictive_samples = prior_predictive.sample()
        # tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32))

    # Take samples from the prior with a placeholder
    with tf.variable_scope('model', reuse=True):
        z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=z_input, hidden_size=FLAGS.hidden_size)
        prior_predictive_inp = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        prior_predictive_inp_sample = prior_predictive_inp.sample()

        #################################################################################################
        # for i in range(FLAGS.n_iterations * (n_samples2 // FLAGS.batch_size)):
        #     offset = (i) % (n_samples2 // FLAGS.batch_size)
        #     np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1)
        #     #
        #     np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size]
        #     #  # np_x_fixed = np_x_fixed.reshape((FLAGS.batch_size), 116, 1)
        #     #  # np_x_fixed = (np_x_fixed > 0.5).astype(np.float32)

        # Build the evidence lower bound (ELBO) or the negative loss

        # kl = -0.5*tf.reduce_sum(1 + q_sigma - tf.square(q_mu) - tf.exp(q_sigma), reduction_indices=1)
        # kl = tf.reduce_sum(distributions.kl(q_z.distribution, p_z), 0)
        kl = distributions.kl(q_z.distribution, p_z)
        #Original
        expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)

        #expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(x),
        #                                        [1, 2, 3])

        elbo = tf.reduce_sum(expected_log_likelihood - kl, 0)

        optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001)
        # optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(-elbo)
        # train_op = optimizer.minimize(elbo)

        # Merge all the summaries
        tf.scalar_summary("ELBO", elbo)
        summary_op = tf.summary.merge_all()

        init_op = tf.global_variables_initializer()

        # Run training
        sess = tf.InteractiveSession()
        sess.run(init_op)

        print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir)
        train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)

        # Get fixed MNIST digits for plotting posterior means during training
        # for i in range(FLAGS.n_iterations*(n_samples2//FLAGS.batch_size)):
        #    offset = (i)%(n_samples2//FLAGS.batch_size)
        #    np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1)
        # #
        #    np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size]
        # #  # np_x_fixed = np_x_fixed.reshape((FLAGS.batch_size), 116, 1)
        # #  # np_x_fixed = (np_x_fixed > 0.5).astype(np.float32)

        for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)):
            offset = (i) % (n_samples // FLAGS.batch_size)
            # Re-binarize the data at every batch; this improves results
            # Original
            #np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1,input_dim, 1)
            np_x = arr[offset * FLAGS.batch_size:(offset + 1) *
                       FLAGS.batch_size].reshape(-1, input_dim, 1)

            np_y = training_label[offset * FLAGS.batch_size:(offset + 1) *
                                  FLAGS.batch_size].reshape(-1, 2, 1)
            # a = np.argmax(np_y,1)
            # np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1)
            # np_x = (np_x > 0.5).astype(np.float32)
            sess.run(train_op, {x: np_x})

            # Print progress and save samples every so often
            t0 = time.time()
            if i % FLAGS.print_every == 0:
                np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x})
                train_writer.add_summary(summary_str, i)
                print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.
                      format(
                          i, np_elbo / FLAGS.batch_size, FLAGS.batch_size *
                          FLAGS.print_every / (time.time() - t0)))
                t0 = time.time()

            #   # Save samples
            # np_posterior_samples, np_prior_samples = sess.run(
            #       [posterior_predictive_samples, prior_predictive_samples], {x: np_x})
            # for k in range(FLAGS.n_samples):
            #   f_name = os.path.join(
            #       FLAGS.logdir, 'iter_%d_posterior_predictive_%d_data.jpg' % (i, k))
            #   imsave(f_name, np_x[k, :, :, 0])
            #   f_name = os.path.join(
            #       FLAGS.logdir, 'iter_%d_posterior_predictive_%d_sample.jpg' % (i, k))
            #   imsave(f_name, np_posterior_samples[k, :, :, 0])
            #   f_name = os.path.join(
            #       FLAGS.logdir, 'iter_%d_prior_predictive_%d.jpg' % (i, k))
            #   imsave(f_name, np_prior_samples[k, :, :, 0])

            # For Plot using matplotlib
            if FLAGS.latent_dim == 2:
                np_q_mu = sess.run(q_mu, {x: np_x})
                cmap = plt.get_cmap('jet', 2)
                # cmap = mpl.colors.ListedColormap(sns.color_palette("husl"))
                f, ax = plt.subplots(1, figsize=(6 * 1.1618, 6))
                im = ax.scatter(np_q_mu[:, 0],
                                np_q_mu[:, 1],
                                c=np.argmax(np_y, 1),
                                cmap=cmap,
                                alpha=0.7)
                # im = ax.scatter(np_q_mu[:, 0], np_q_mu[:, 1], c=np.argmax(np_y, 1), cmap='RdBu', alpha=0.7)

                ax.set_xlabel(
                    'First dimension of sampled latent variable $z_1$')
                ax.set_ylabel(
                    'Second dimension of sampled latent variable mean $z_2$')
                ax.set_xlim([-3, 1])
                ax.set_ylim([-3, 1])
                f.colorbar(im, ax=ax, label='Patient or not')
                plt.tight_layout()

                if i % FLAGS.print_every == 0:
                    plt.savefig(
                        os.path.join(
                            FLAGS.logdir,
                            'posterior_predictive_map_frame_%d.png' % i))
                plt.close()
コード例 #16
0
def train():
    # Input placeholders
    with tf.name_scope('arr'):
        x = tf.placeholder(tf.float32, [None, input_dim, 1])
        # x = tf.placeholder(tf.float32, [None, input_dim,1])
        # tf.summary.image('arr', x)

    with tf.variable_scope('variational'):
        q_mu, q_sigma = inference_network(x=x,
                                          latent_dim=FLAGS.latent_dim,
                                          hidden_size=FLAGS.hidden_size)
        with st.value_type(st.SampleValue()):
            # The variational distribution is a Normal with mean and standard
            # deviation given by the inference network
            q_z = st.StochasticTensor(
                distributions.MultivariateNormalDiag(mu=q_mu,
                                                     diag_stdev=q_sigma))

    with tf.variable_scope('model'):
        # The likelihood is Bernoulli-distributed with logits given by the generative network
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=q_z, hidden_size=FLAGS.hidden_size)
        p_x_given_z = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        posterior_predictive_samples = p_x_given_z.sample()
        # tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32))

    # Take samples from the prior
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.MultivariateNormalDiag(
            mu=np.zeros(FLAGS.latent_dim, dtype=np.float32),
            diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32))
        p_z_sample = p_z.sample_n(FLAGS.n_samples)
        p_z_sample2 = p_z.sample_n(n_samples2)
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=p_z_sample2, hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        prior_predictive_samples = prior_predictive.sample()
        # tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32))

    # Take samples from the prior with a placeholder
    with tf.variable_scope('model', reuse=True):
        z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=z_input, hidden_size=FLAGS.hidden_size)
        prior_predictive_inp = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)

        prior_predictive_inp_sample = prior_predictive_inp.sample()

        #################################################################################################
        # for i in range(FLAGS.n_iterations * (n_samples2 // FLAGS.batch_size)):
        #     offset = (i) % (n_samples2 // FLAGS.batch_size)
        #     np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim, 1)
        #     #
        #     np_y = test_label[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size]
        #     #  # np_x_fixed = np_x_fixed.reshape((FLAGS.batch_size), 116, 1)
        #     #  # np_x_fixed = (np_x_fixed > 0.5).astype(np.float32)

        # Build the evidence lower bound (ELBO) or the negative loss
        kl = distributions.kl(q_z.distribution, p_z)
        expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)
        elbo = tf.reduce_sum(expected_log_likelihood - kl, 0)

        optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001)
        train_op = optimizer.minimize(-elbo)
        # train_op = optimizer.minimize(elbo)
        tf.scalar_summary("ELBO", elbo)

        # Merge all the summaries

        summary_op = tf.summary.merge_all()

        init_op = tf.global_variables_initializer()

        # Run training
        sess = tf.InteractiveSession()
        sess.run(init_op)

        print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir)
        train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)

        for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)):
            offset = (i) % (n_samples // FLAGS.batch_size)
            # Re-binarize the data at every batch; this improves results
            # Original
            #np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1,input_dim, 1)
            np_x = arr2[offset * FLAGS.batch_size:(offset + 1) *
                        FLAGS.batch_size].reshape(-1, input_dim, 1)
            np_y = test_label[offset * FLAGS.batch_size:(offset + 1) *
                              FLAGS.batch_size].reshape(-1, 2, 1)
            # np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1)
            # np_x = (np_x > 0.5).astype(np.float32)
            sess.run(train_op, {x: np_x})

            # Print progress and save samples every so often
            t0 = time.time()
            if i % FLAGS.print_every == 0:
                np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x})
                train_writer.add_summary(summary_str, i)
                print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.
                      format(
                          i, np_elbo / FLAGS.batch_size, FLAGS.batch_size *
                          FLAGS.print_every / (time.time() - t0)))
                t0 = time.time()

                # Save samples
                #   np_prior_samples = sess.run(prior_predictive_samples, {x: np_x})
                # if __name__ == "__main__":

                print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
                print "Running example oz_inputn 2,500 MNIST digits..."
                a = (n_samples2, FLAGS.latent_dim)
                np_z = np.zeros(a, dtype=np.float32)
                print(np_z.shape)
                X = sess.run(prior_predictive_inp_sample,
                             {z_input: p_z_sample2})
                # X = z_input.eval()
                # X = p_z_sample2.eval()
                # X = X.reshape(-1, FLAGS.latent_dim)
                # X= prior_predictive_samples.eval().reshape(-1, input_dim)
                # X =  prior_predictive_inp_sample.eval()
                print(X.shape)
                labels = test_label
                print(labels.shape)
                Y = tsne.tsne(X, 2, 20, 30.0)
                np.save('Error.npy', Y)
                cmap = plt.get_cmap('jet', 2)
                plt.scatter(Y[:, 0],
                            Y[:, 1],
                            20,
                            c=np.argmax(labels, 1),
                            cmap=cmap)
                plt.savefig(
                    os.path.join(FLAGS.logdir, 'tSNE_map_frame_%d.png' % i))
                # plt.scatter(Y[:,0], Y[:,1], 20, c=np.argmax(np_y, 1), cmap=cmap )
                plt.close()
コード例 #17
0
def train():
    # Input placeholders
    with tf.name_scope('arr'):
        x = tf.placeholder(tf.float32, [None, input_dim])

    with tf.name_scope('data_for_oneZ'):
        y = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.latent_dim*2])

    with tf.variable_scope('variational'):
        q_mu, q_sigma = inference_network(x=x,
                                          latent_dim=FLAGS.latent_dim,
                                          hidden_size=FLAGS.hidden_size)


        with st.value_type(st.SampleValue()):
            # The variational distribution is a Normal with mean and standard
            # deviation given by the inference network
            q_z = st.StochasticTensor(distributions.MultivariateNormalDiag(mu=q_mu, diag_stdev=q_sigma))

    with tf.variable_scope('variational_2'):

        separated_mu = y[:, :FLAGS.latent_dim]
        separated_sigma = y[:, FLAGS.latent_dim:]
        q_mu_y, q_sigma_y = TimeTrajectory_foroneZ(a=separated_mu,b=separated_sigma)

        with st.value_type(st.SampleValue()):
        # The variational distribution is a Normal with mean and standard
        # deviation given by the inference network
            q_z_2 = st.StochasticTensor(distributions.MultivariateNormalDiag(mu=q_mu_y, diag_stdev=q_sigma_y))


    with tf.variable_scope('model'):
        # The likelihood is Bernoulli-distributed with logits given by the generative network
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(z=q_z,
                                                               hidden_size=FLAGS.hidden_size)
        p_x_given_z = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)


    with tf.variable_scope('model_2'):
        p_x_given_z_mu_2, p_x_given_z_sigma_2 = generative_network(z=q_z_2,
                                                               hidden_size=FLAGS.hidden_size)
        p_x_given_z_2 = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu_2, diag_stdev=p_x_given_z_sigma_2)

    # Take samples from the prior
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.MultivariateNormalDiag(mu=np.zeros(FLAGS.latent_dim, dtype=np.float32),
                                                   diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32))
        p_z_sample = p_z.sample_n(FLAGS.latent_dim*samples_for_data)
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(z=p_z_sample,
                                                               hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)

    with tf.variable_scope('model_2', reuse=True):
        p_z_2 = distributions.MultivariateNormalDiag(mu=np.zeros(FLAGS.latent_dim, dtype=np.float32),
                                                       diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32))
        p_z_sample_2 = p_z_2.sample_n(FLAGS.latent_dim*samples_for_data)
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(z=p_z_sample_2,
                                                                   hidden_size=FLAGS.hidden_size)
        prior_predictive_2 = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        prior_predictive_inp_sample = prior_predictive_2.sample()

        # Take samples from the prior with a placeholder
        # with tf.variable_scope('model', reuse=True):
        #   z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        #   p_x_given_z_mu, p_x_given_z_sigma= generative_network(z=z_input,
        #                                           hidden_size=FLAGS.hidden_size)
        #   prior_predictive_inp = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev = p_x_given_z_sigma)
        #
        #   prior_predictive_inp_sample = prior_predictive_inp.sample()

        #################################################################################################

        # Build the evidence lower bound (ELBO) or the negative loss
        kl = distributions.kl(q_z.distribution, p_z)
        expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)
        elbo = tf.reduce_sum(expected_log_likelihood - kl, 0)

        optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00001)
        train_op = optimizer.minimize(-elbo)
        # train_op = optimizer.minimize(elbo)
        tf.scalar_summary("ELBO", elbo)


        # Merge all the summaries
        summary_op = tf.summary.merge_all()
        init_op = tf.global_variables_initializer()

        # Run training
        sess = tf.InteractiveSession()
        sess.run(init_op)

        print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir)
        train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)



        for i in range(FLAGS.n_iterations * (samples_for_data // FLAGS.batch_size)):
            offset = (i) % (samples_for_data // FLAGS.batch_size)
            # Re-binarize the data at every batch; this improves results
            # Original
            # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim)
            np_x = arr[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim)
            np_y_fixed = labels[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size]

            # _, q_mu_out, q_sigma_out = sess.run([train_op, q_mu, q_sigma], {x: np_x})
            sess.run(train_op, {x: np_x})


            t0 = time.time()
            if i % FLAGS.print_every == 0:
                np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x})
                train_writer.add_summary(summary_str, i)
                print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format(i, np_elbo / FLAGS.batch_size,

                                                                                  FLAGS.batch_size * FLAGS.print_every / (
                                                                                      time.time() - t0)))

                t0 = time.time()

        # curr=[]

        if i in range((FLAGS.n_iterations-1) *(samples_for_data//FLAGS.batch_size), (FLAGS.n_iterations) *(samples_for_data//FLAGS.batch_size)):
            z_mu, z_sigma = sess.run([q_mu, q_sigma], {x: np_x})
            concat_z_parameters = np.concatenate([z_mu, z_sigma], 1)
            # Sparsed_Z_mean = sess.run(prior_predictive_inp_sample, {y : concat_z_parameters})
            Sparsed_Z_mean = sess.run(prior_predictive_inp_sample, {y: concat_z_parameters})

            print(Sparsed_Z_mean.shape)

            Reshaped_Z_mean= np.reshape(Sparsed_Z_mean,(62,1300,-1))
            print(Reshaped_Z_mean.shape)
コード例 #18
0
def train():
    # Input placeholders
    with tf.name_scope('arr'):
        x = tf.placeholder(tf.float32, [None, input_dim])

    with tf.variable_scope('variational'):
        q_mu, q_sigma = inference_network(x=x,
                                          latent_dim=FLAGS.latent_dim,
                                          hidden_size=FLAGS.hidden_size)

        with st.value_type(st.SampleValue()):
            # The variational distribution is a Normal with mean and standard
            # deviation given by the inference network
            q_z = st.StochasticTensor(
                distributions.MultivariateNormalDiag(mu=q_mu,
                                                     diag_stdev=q_sigma))

    with tf.variable_scope('model'):
        # The likelihood is Bernoulli-distributed with logits given by the generative network
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=q_z, hidden_size=FLAGS.hidden_size)
        p_x_given_z = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        posterior_predictive_samples = p_x_given_z.sample()

    # Take samples from the prior
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.MultivariateNormalDiag(
            mu=np.zeros(FLAGS.latent_dim, dtype=np.float32),
            diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32))
        p_z_sample = p_z.sample_n(FLAGS.n_samples)
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=p_z_sample, hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        prior_predictive_samples = prior_predictive.sample()

        # Take samples from the prior with a placeholder
        # with tf.variable_scope('model', reuse=True):
        #   z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        #   p_x_given_z_mu, p_x_given_z_sigma= generative_network(z=z_input,
        #                                           hidden_size=FLAGS.hidden_size)
        #   prior_predictive_inp = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev = p_x_given_z_sigma)
        #
        #   prior_predictive_inp_sample = prior_predictive_inp.sample()

        #################################################################################################

        # Build the evidence lower bound (ELBO) or the negative loss
        kl = distributions.kl(q_z.distribution, p_z)
        expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)
        elbo = tf.reduce_sum(expected_log_likelihood - kl, 0)

        optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00001)
        train_op = optimizer.minimize(-elbo)
        # train_op = optimizer.minimize(elbo)
        tf.scalar_summary("ELBO", elbo)

        # Merge all the summaries
        summary_op = tf.summary.merge_all()
        init_op = tf.global_variables_initializer()

        # Run training
        sess = tf.InteractiveSession()
        sess.run(init_op)

        print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir)
        train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)

        for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)):
            offset = (i) % (n_samples // FLAGS.batch_size)
            # Re-binarize the data at every batch; this improves results
            # Original
            np_x_tsne = arr
            # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim)
            np_x = arr[offset * FLAGS.batch_size:(offset + 1) *
                       FLAGS.batch_size].reshape(-1, input_dim)
            np_y_fixed = labels[offset * FLAGS.batch_size:(offset + 1) *
                                FLAGS.batch_size]

            # _, q_mu_out, q_sigma_out = sess.run([train_op, q_mu, q_sigma], {x: np_x})
            sess.run(train_op, {x: np_x})

            t0 = time.time()
            if i % FLAGS.print_every == 0:
                np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x})
                train_writer.add_summary(summary_str, i)
                print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.
                      format(
                          i, np_elbo / FLAGS.batch_size, FLAGS.batch_size *
                          FLAGS.print_every / (time.time() - t0)))

                t0 = time.time()
#             # print(range((FLAGS.n_iterations-2) *(n_samples//FLAGS.batch_size), (FLAGS.n_iterations-1) *(n_samples//FLAGS.batch_size)))
            if i in range(
                (FLAGS.n_iterations - 1) * (n_samples // FLAGS.batch_size),
                (FLAGS.n_iterations) * (n_samples // FLAGS.batch_size)):
                # if offset==0 and i!=0:

                print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
                print "Running example of_input ADNI..."
                # X = sess.run(q_mu, {x: np_x})
                X, q_sigma_out = sess.run([q_mu, q_sigma], {x: np_x})
                # X, q_sigma_out = sess.run([q_mu, q_sigma], {x: np_x_tsne})

                # np.savetxt('inferenced_z_mu_%d'%i, X)
                # np.savetxt('inferenced_z_sigma_%d'%i, q_sigma_out)

                labels_tsne = np.argmax(np_y_fixed, 1)

                Y = tsne.tsne(X, 2, 20, 20.0)
                # np.savetxt('tsne_Y_values_%d.txt'% i, Y)
                #                 # cmap = plt.get_cmap('bwr')
                #
                fig = plt.figure(facecolor="white", figsize=(10.0, 8.0))
                plt.xlim(-150.0, 150.0)
                plt.ylim(-150.0, 150.0)
                plt.axis("off")

                if labels_tsne[0] == 1:
                    plt.scatter(Y[:, 0],
                                Y[:, 1],
                                20,
                                c=labels_tsne,
                                cmap=mpl.colors.ListedColormap('red'))
                else:
                    plt.scatter(Y[:, 0],
                                Y[:, 1],
                                20,
                                c=labels_tsne,
                                cmap=mpl.colors.ListedColormap('blue'))

                plt.savefig(
                    os.path.join(FLAGS.logdir, 'tSNE_map_frame_%d.png' % i))
                #
                #
                # ##########################################################################################################################################################
                # fig = plt.figure(facecolor="white", figsize=(15.0, 10.0))
                # scat = plt.scatter(Y[:, 0], Y[:, 1], 20, c = labels_tsne, cmap=mpl.colors.ListedColormap('black'))
                scat = plt.scatter([], [], c='white')

                def initiation():
                    scat.set_offsets([])
                    return scat,

                def animate(t):
                    x_ani = Y[:, 0].transpose()
                    y_ani = Y[:, 1].transpose()
                    data_ani = np.hstack(
                        (x_ani[t:, np.newaxis], y_ani[t:, np.newaxis]))
                    # print (data_ani)
                    scat.set_offsets(data_ani)
                    return scat,

#                 # ims = []
#                 # timepoint = []
#                 #
#                 # for a in scat():
#                 #     timepoint.append(a)
#                 #     ims.append(timepoint)
#

                ani = animation.FuncAnimation(fig,
                                              animate,
                                              init_func=initiation,
                                              frames=FLAGS.batch_size + 17,
                                              interval=200,
                                              blit=True)
                # plt.show()

                Writer = animation.writers['ffmpeg']
                writer = Writer(
                    fps=13,
                    metadata=dict(
                        artist='Kang, Eun Song (Korea University MiLab)'),
                    bitrate=1800)

                # ani.save("test_%d.mov" %i, writer=writer, dpi=300)
                ani.save(os.path.join(FLAGS.logdir, 'test_%d.mov' % i),
                         writer=writer,
                         dpi=300)
コード例 #19
0
def train():
    # Input placeholders
    with tf.name_scope('arr'):
        x = tf.placeholder(tf.float32, [None, input_dim])

    with tf.variable_scope('variational'):
        q_mu, q_sigma = inference_network(x=x,
                                          latent_dim=FLAGS.latent_dim,
                                          hidden_size=FLAGS.hidden_size)
        with st.value_type(st.SampleValue()):
            # The variational distribution is a Normal with mean and standard
            # deviation given by the inference network
            q_z = st.StochasticTensor(
                distributions.MultivariateNormalDiag(mu=q_mu,
                                                     diag_stdev=q_sigma))

    with tf.variable_scope('model'):
        # The likelihood is Bernoulli-distributed with logits given by the generative network
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=q_z, hidden_size=FLAGS.hidden_size)
        p_x_given_z = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        posterior_predictive_samples = p_x_given_z.sample()

    # Take samples from the prior
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.MultivariateNormalDiag(
            mu=np.zeros(FLAGS.latent_dim, dtype=np.float32),
            diag_stdev=np.ones(FLAGS.latent_dim, dtype=np.float32))
        p_z_sample = p_z.sample_n(FLAGS.n_samples)
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=p_z_sample, hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.MultivariateNormalDiag(
            mu=p_x_given_z_mu, diag_stdev=p_x_given_z_sigma)
        prior_predictive_samples = prior_predictive.sample()

        # Take samples from the prior with a placeholder
        # with tf.variable_scope('model', reuse=True):
        #   z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        #   p_x_given_z_mu, p_x_given_z_sigma= generative_network(z=z_input,
        #                                           hidden_size=FLAGS.hidden_size)
        #   prior_predictive_inp = distributions.MultivariateNormalDiag(mu=p_x_given_z_mu, diag_stdev = p_x_given_z_sigma)
        #
        #   prior_predictive_inp_sample = prior_predictive_inp.sample()

        #################################################################################################

        # Build the evidence lower bound (ELBO) or the negative loss
        kl = distributions.kl(q_z.distribution, p_z)
        expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)
        elbo = tf.reduce_sum(expected_log_likelihood - kl, 0)

        optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00001)
        train_op = optimizer.minimize(-elbo)
        # train_op = optimizer.minimize(elbo)
        tf.scalar_summary("ELBO", elbo)

        # Merge all the summaries
        summary_op = tf.summary.merge_all()
        init_op = tf.global_variables_initializer()

        # Run training
        sess = tf.InteractiveSession()
        sess.run(init_op)

        print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir)
        train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)

        for i in range(FLAGS.n_iterations * (n_samples // FLAGS.batch_size)):
            offset = (i) % (n_samples // FLAGS.batch_size)
            # Re-binarize the data at every batch; this improves results
            # Original
            # np_x_fixed = arr2[offset * FLAGS.batch_size:(offset + 1) * FLAGS.batch_size].reshape(-1, input_dim)
            np_x = arr[offset * FLAGS.batch_size:(offset + 1) *
                       FLAGS.batch_size].reshape(-1, input_dim)
            np_y_fixed = labels[offset * FLAGS.batch_size:(offset + 1) *
                                FLAGS.batch_size]

            sess.run(train_op, {x: np_x})
            # sess.run(train_op, {x: np_x_fixed})

            t0 = time.time()
            if i % FLAGS.print_every == 0:
                np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x})
                train_writer.add_summary(summary_str, i)
                print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.
                      format(
                          i, np_elbo / FLAGS.batch_size, FLAGS.batch_size *
                          FLAGS.print_every / (time.time() - t0)))
                t0 = time.time()
            # print(range((FLAGS.n_iterations-2) *(n_samples//FLAGS.batch_size), (FLAGS.n_iterations-1) *(n_samples//FLAGS.batch_size)))
            if i in range(
                (FLAGS.n_iterations - 1) * (n_samples // FLAGS.batch_size),
                (FLAGS.n_iterations) * (n_samples // FLAGS.batch_size)):
                # if offset==0 and i!=0:t
                print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
                print "Running example oz_inputn 2,500 MNIST digits..."
                X = sess.run(q_mu, {x: np_x})
                labels_tsne = np.argmax(np_y_fixed, 1)

                Y = tsne_MDD.tsne(X, 2, 20, 15.0)
                np.save('Error.npy', Y)
                # cmap = plt.get_cmap('bwr')

                plt.xlim(-50.0, 50.0)
                plt.ylim(-50.0, 50.0)

                if labels_tsne[0] == 1:
                    plt.scatter(Y[:, 0],
                                Y[:, 1],
                                20,
                                c=labels_tsne,
                                cmap=mpl.colors.ListedColormap('red'))
                else:
                    plt.scatter(Y[:, 0],
                                Y[:, 1],
                                20,
                                c=labels_tsne,
                                cmap=mpl.colors.ListedColormap('blue'))

                labels_plt = ['{0}'.format(j) for j in range(170)]
                for label, a, b in zip(labels_plt, Y[:, 0], Y[:, 1]):
                    plt.annotate(label,
                                 xy=(a, b),
                                 xytext=(-0.07, 0.07),
                                 textcoords='offset points',
                                 ha='right',
                                 va='bottom',
                                 arrowprops=dict(arrowstyle='->',
                                                 connectionstyle='arc3,rad=0'))

                plt.savefig(
                    os.path.join(FLAGS.logdir, 'tSNE_map_frame_%d.png' % i))
                plt.close()
コード例 #20
0
ファイル: playground.py プロジェクト: jonberliner/siamese
loss_match = d**2.
loss_nomatch = tf.maximum(0., 1. - d)**2.

losses = tf.where(match, loss_match, loss_nomatch)
d_loss = tf.reduce_sum(losses)

c_loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=class_hat1, labels=tf.cast(y1, tf.int32))
c_loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=class_hat2, labels=tf.cast(y2, tf.int32))

if VAE:
    # lx1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=rx1, labels=x1), 1)
    # lx2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=rx2, labels=x2), 1)
    lx1 = tf.reduce_sum(kl(Bernoulli(p=x1), Bernoulli(logits=rx1), 1))
    lx2 = tf.reduce_sum(kl(Bernoulli(p=x2), Bernoulli(logits=rx2), 1))
    lz1 = tf.reduce_sum(kl(Normal(qmu1, qv1), Normal(0., 1.)), 1)
    lz2 = tf.reduce_sum(kl(Normal(qmu2, qv2), Normal(0., 1.)), 1)
    loss = tf.reduce_sum(d_loss + c_loss1 + c_loss2 + lx1 + lx2 + lz1 + lz2)
else:
    loss = tf.reduce_sum(d_loss + c_loss1 + c_loss2)

for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='distance'):
    loss += tf.reduce_sum(v**2.)*1e-3
    loss += tf.reduce_sum(tf.abs(v))*1e-3
# for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='classify'):
#     loss += tf.reduce_sum(v**2.)*1e-3
#     loss += tf.reduce_sum(tf.abs(v))*1e-3

trainer = tf.train.AdamOptimizer(1e-3).minimize(loss)
コード例 #21
0
ファイル: distributions.py プロジェクト: JayceeLee/TRPO
 def kl_divergence(self, prob0, prob1):
     return tf.reduce_sum(distributions.kl(prob0, prob1, name='kl_divergence'), axis=1)
コード例 #22
0
def train():
    """ Input placeholders"""
    with tf.name_scope('ROIs'):
        x = tf.placeholder(tf.float32, [None, input_dim])

    with tf.variable_scope('variational'):
        q_mu, q_sigma = inference_network(x=x,
                                          latent_dim=FLAGS.latent_dim,
                                          hidden_size=FLAGS.hidden_size,
                                          layers=FLAGS.hidden_layer,
                                          trainornot=FLAGS.train)

        p_z = distributions.MultivariateNormalDiag(
            loc=np.zeros(FLAGS.latent_dim, dtype=np.float32),
            scale_diag=np.ones(FLAGS.latent_dim, dtype=np.float32))

        with st.value_type(st.SampleValue()):
            # The variational distribution is a Normal with mean and standard
            # deviation given by the inference network
            q_z = st.StochasticTensor(
                distributions.MultivariateNormalDiag(loc=q_mu,
                                                     scale_diag=q_sigma))

    with tf.variable_scope('generative'):
        # The likelihood is Gaussian-distributed with parameter mu given by the generative network
        p_x_given_z_mu, p_x_given_z_sigma = generative_network(
            z=q_z,
            hidden_size=FLAGS.hidden_size,
            layers=FLAGS.hidden_layer,
            trainornot=FLAGS.train)

        p_x_given_z = distributions.MultivariateNormalDiag(
            loc=p_x_given_z_mu, scale_diag=p_x_given_z_sigma)

    with tf.variable_scope('generative', reuse=True):
        z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        p_x_given_z_mu_2, p_x_given_z_sigma_2 = generative_network(
            z=z_input,
            hidden_size=FLAGS.hidden_size,
            layers=FLAGS.hidden_layer,
            trainornot=FLAGS.train)
        p_x_given_z_2 = distributions.MultivariateNormalDiag(
            loc=p_x_given_z_mu_2, scale_diag=p_x_given_z_sigma)
        prior_predictive = p_x_given_z_2.copy()

        prior_predictive_inp_sample = prior_predictive.sample()

    # Build the evidence lower bound (ELBO) or the negative loss

    # For no regularization term
    # kl = distributions.kl(q_z.distribution, p_z)
    # expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)
    # elbo = tf.reduce_sum(expected_log_likelihood - kl, 0)

    kl = distributions.kl(q_z.distribution, p_z)
    reg_variables = slim.losses.get_regularization_losses()
    reg_variables_sum = tf.reduce_sum(reg_variables)
    expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), -1)
    reg_expected_log_likelihood = expected_log_likelihood + reg_variables_sum
    elbo = tf.reduce_sum(reg_expected_log_likelihood - kl, 0)

    # Optimization
    optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001)
    train_op = optimizer.minimize(-elbo)
    tf.summary.scalar("ELBO", elbo)

    # Merge all the summaries
    summary_op = tf.summary.merge_all()
    init_op = tf.global_variables_initializer()

    # Run training
    sess = tf.InteractiveSession()
    sess.run(init_op)

    print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir)
    train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)

    randidx = np.random.permutation(
        np.arange(samples_for_data, dtype=np.uint32))
    saver = tf.train.Saver()

    #Batchsize
    cur_epoch = 0
    for i in range(
        (FLAGS.n_iterations * samples_for_data) // FLAGS.batch_size):
        offset = (i) % (samples_for_data // FLAGS.batch_size)
        np_x = arr[randidx[offset * FLAGS.batch_size:(offset + 1) *
                           FLAGS.batch_size]].reshape(-1, input_dim).copy()
        sess.run(train_op, {x: np_x})

        t0 = time.time()
        if i % FLAGS.print_every == 0:
            np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x})
            train_writer.add_summary(summary_str, i)
            print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format(
                i, np_elbo / FLAGS.batch_size,
                FLAGS.batch_size * FLAGS.print_every / (time.time() - t0)))

        if cur_epoch != int((i * FLAGS.batch_size) / samples_for_data):
            # print("Saved in path", saver.save(sess, os.path.join(FLAGS.logdir, "%02d.ckpt" % (cur_epoch))))
            randidx = np.random.permutation(samples_for_data)
        cur_epoch = int((i * FLAGS.batch_size) / samples_for_data)
        t0 = time.time()
    saver.save(sess, os.path.join(FLAGS.logdir, 'savedmodel_final.ckpt'))
コード例 #23
0
# # Imports
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import tensorflow as tf
import tensorflow.contrib.distributions as dis
from keras import backend as K
from keras.layers import Input, Dense, Lambda, Layer
from keras.models import Model
from keras import metrics
from keras.datasets import mnist
from scipy.stats import norm

# # Variational autoencoder (VAE)
Normal = tf.contrib.distributions.Normal
t = dis.kl(Normal(3.0, 2.0), Normal(0.0, 1.0))  #mean, st dev
t2 = dis.kl(Normal(3.0, 1.0), Normal(2.9, 1.0))
t3 = dis.kl(Normal(3.0, 1.0), Normal(3.0, 1.0))

with tf.Session() as session:
    t_val = session.run(t)
    print('KLD(N(3,2), N(0,1)) =', t_val, ", value = ",
          .5 * (np.log(2) - 1 + 2.0 + 3**2))
    t_val = session.run(t2)
    print('KLD(N(3,1), N(2.9,1)) =', t_val)
    t_val = session.run(t3)
    print('KLD(N(3,1), N(3,1)) =', t_val)

# # Implementing the variational auto encoder

#hyper parameters
コード例 #24
0
def _elbo(form, log_likelihood, log_joint, variational_with_prior,
          keep_batch_dim):
  """Internal implementation of ELBO. Users should use `elbo`.

  Args:
    form: ELBOForms constant. Controls how the ELBO is computed.
    log_likelihood: `Tensor` log p(x|Z).
    log_joint: `Tensor` log p(x, Z).
    variational_with_prior: `dict<DistributionTensor, Distribution>`, varational
      distributions to prior distributions.
    keep_batch_dim: bool. Whether to keep the batch dimension when reducing
      the entropy/KL.

  Returns:
    ELBO `Tensor` with same shape and dtype as `log_likelihood`/`log_joint`.
  """
  ELBOForms.check_form(form)

  # Order of preference
  # 1. Analytic KL: log_likelihood - KL(q||p)
  # 2. Analytic entropy: log_likelihood + log p(Z) + H[q], or log_joint + H[q]
  # 3. Sample: log_likelihood - (log q(Z) - log p(Z)) =
  #            log_likelihood + log p(Z) - log q(Z), or log_joint - q(Z)

  def _reduce(val):
    if keep_batch_dim:
      return val
    else:
      return math_ops.reduce_sum(val)

  kl_terms = []
  entropy_terms = []
  prior_terms = []
  for q, z, p in [(qz.distribution, qz.value(), pz)
                  for qz, pz in variational_with_prior.items()]:
    # Analytic KL
    kl = None
    if log_joint is None and form in {ELBOForms.default, ELBOForms.analytic_kl}:
      try:
        kl = distributions.kl(q, p)
        logging.info("Using analytic KL between q:%s, p:%s", q, p)
      except NotImplementedError as e:
        if form == ELBOForms.analytic_kl:
          raise e
    if kl is not None:
      kl_terms.append(-1. * _reduce(kl))
      continue

    # Analytic entropy
    entropy = None
    if form in {ELBOForms.default, ELBOForms.analytic_entropy}:
      try:
        entropy = q.entropy()
        logging.info("Using analytic entropy for q:%s", q)
      except NotImplementedError as e:
        if form == ELBOForms.analytic_entropy:
          raise e
    if entropy is not None:
      entropy_terms.append(_reduce(entropy))
      if log_likelihood is not None:
        prior = p.log_prob(z)
        prior_terms.append(_reduce(prior))
      continue

    # Sample
    if form in {ELBOForms.default, ELBOForms.sample}:
      entropy = -q.log_prob(z)
      entropy_terms.append(_reduce(entropy))
      if log_likelihood is not None:
        prior = p.log_prob(z)
        prior_terms.append(_reduce(prior))

  first_term = log_joint if log_joint is not None else log_likelihood
  return sum([first_term] + kl_terms + entropy_terms + prior_terms)
コード例 #25
0
def _elbo(form, log_likelihood, log_joint, variational_with_prior,
          keep_batch_dim):
    """Internal implementation of ELBO. Users should use `elbo`.

  Args:
    form: ELBOForms constant. Controls how the ELBO is computed.
    log_likelihood: `Tensor` log p(x|Z).
    log_joint: `Tensor` log p(x, Z).
    variational_with_prior: `dict<StochasticTensor, Distribution>`, varational
      distributions to prior distributions.
    keep_batch_dim: bool. Whether to keep the batch dimension when reducing
      the entropy/KL.

  Returns:
    ELBO `Tensor` with same shape and dtype as `log_likelihood`/`log_joint`.
  """
    ELBOForms.check_form(form)

    # Order of preference
    # 1. Analytic KL: log_likelihood - KL(q||p)
    # 2. Analytic entropy: log_likelihood + log p(Z) + H[q], or log_joint + H[q]
    # 3. Sample: log_likelihood - (log q(Z) - log p(Z)) =
    #            log_likelihood + log p(Z) - log q(Z), or log_joint - q(Z)

    def _reduce(val):
        if keep_batch_dim:
            return val
        else:
            return math_ops.reduce_sum(val)

    kl_terms = []
    entropy_terms = []
    prior_terms = []
    for q, z, p in [(qz.distribution, qz.value(), pz)
                    for qz, pz in variational_with_prior.items()]:
        # Analytic KL
        kl = None
        if log_joint is None and form in {
                ELBOForms.default, ELBOForms.analytic_kl
        }:
            try:
                kl = distributions.kl(q, p)
                logging.info("Using analytic KL between q:%s, p:%s", q, p)
            except NotImplementedError as e:
                if form == ELBOForms.analytic_kl:
                    raise e
        if kl is not None:
            kl_terms.append(-1. * _reduce(kl))
            continue

        # Analytic entropy
        entropy = None
        if form in {ELBOForms.default, ELBOForms.analytic_entropy}:
            try:
                entropy = q.entropy()
                logging.info("Using analytic entropy for q:%s", q)
            except NotImplementedError as e:
                if form == ELBOForms.analytic_entropy:
                    raise e
        if entropy is not None:
            entropy_terms.append(_reduce(entropy))
            if log_likelihood is not None:
                prior = p.log_prob(z)
                prior_terms.append(_reduce(prior))
            continue

        # Sample
        if form in {ELBOForms.default, ELBOForms.sample}:
            entropy = -q.log_prob(z)
            entropy_terms.append(_reduce(entropy))
            if log_likelihood is not None:
                prior = p.log_prob(z)
                prior_terms.append(_reduce(prior))

    first_term = log_joint if log_joint is not None else log_likelihood
    return sum([first_term] + kl_terms + entropy_terms + prior_terms)
コード例 #26
0
ファイル: e2c.py プロジェクト: ZhengYi0310/other_stuff
    def _create_vlb(self):
        if FLAGS.dynamics == False:
            # When there is no transition dynamics, the loss is composed of two terms:
            # 1.) The reconstruction loss (the negative log probability
            #     of the input under the reconstructed Gaussian distribution
            #     induced by the decoder in the data space).
            # Adding 1e-10 to avoid evaluation of log(0.0)
            # Q_phi = distributions.MultivariateNormalDiag(self.x_recons_mean, tf.sqrt(tf.exp(self.x_recons_logsigma_sq)))
            self.log_prob_reconst = -0.5 * (
                self.input_x.get_shape()[1].value * 2 * np.pi +
                tf.reduce_sum(tf.exp(self.x_recons_logsigma_sq), axis=1) +
                tf.reduce_sum(tf.square(self.input_x - self.x_recons_mean) /
                              tf.exp(self.x_recons_logsigma_sq),
                              axis=1)) + 1e-5  # For numerical stability
            recon_loss = -self.log_prob_reconst
            reconstr_loss = \
                -tf.reduce_sum(self.input_x * tf.log(1e-9 + self.x_recons_mean)
                               + (1 - self.input_x) * tf.log(1e-9 + 1 - self.x_recons_mean),
                               1)
            # recon_loss = tf.reduce_sum(-tf.log(tf.reduce_sum(Q_phi.prob(self.input_x))))
            # recon_loss = -tf.reduce_sum(Q_phi.prob(tf.reshape(self.input_x, [-1, FLAGS.input_dim])))

            # 2.) The latent loss, which is defined as the Kullback Leibler divergence
            ##    between the distribution in latent space induced by the encoder on
            #     the data and some prior. This acts as a kind of regularizer.
            #     This can be interpreted as the number of "nats" required
            #     for transmitting the the latent space distribution given
            #     the prior.
            latent_loss = -0.5 * tf.reduce_sum(
                1 + self.z_sample_logsigma_sq * 2 -
                tf.square(self.z_sample_mean) -
                tf.square(tf.exp(self.z_sample_logsigma_sq)), 1)
            self.cost = tf.reduce_mean(reconstr_loss +
                                       latent_loss)  # average over batch

        else:
            # When there is no transition dynamics, the loss is composed of two to Four terms:
            #See "Embed to Control: A Locally Linear Latent Dynamics Model for Control from Raw Images" by Manuel Watter, Martin Riedmiller et al. for more details
            #See "Stable Reinforcement Learning with Autoencoders for Tactile and Visual Data" by Herke Van Hoof, Patric Van Der Smagt, Jan Peters et al. for more details
            # 1.) The reconstruction loss of the state at the current time stamp (the negative log probability
            #     of the input under the reconstructed Gaussian distribution
            #     induced by the decoder in the data space).
            # Adding 1e-10 to avoid evaluation of log(0.0)
            # Q_eps = distributions.MultivariateNormalDiag(self.x_recons_mean, tf.sqrt(tf.exp(self.x_recons_logsigma_sq)))
            self.log_prob_reconst = -0.5 * (
                self.input_x.get_shape()[1].value * 2 * np.pi +
                tf.reduce_sum(tf.exp(self.x_recons_logsigma_sq), axis=1) +
                tf.reduce_sum(tf.square(self.input_x - self.x_recons_mean) /
                              tf.exp(self.x_recons_logsigma_sq),
                              axis=1)) + 1e-5  # For numerical stability
            recon_loss = -self.log_prob_reconst
            reconstr_loss = \
                -tf.reduce_sum(self.input_x * tf.log(1e-5 + self.x_recons_mean)
                               + (1 - self.input_x) * tf.log(1e-5 + 1 - self.x_recons_mean),
                               1)

            # 2.) The latent loss, which is defined as the Kullback Leibler divergence
            ##    between the distribution in latent space induced by the encoder on
            #     the data and some prior. This acts as a kind of regularizer.
            #     This can be interpreted as the number of "nats" required
            #     for transmitting the the latent space distribution given
            #     the prior.
            latent_loss = -0.5 * tf.reduce_sum(
                1 + self.z_sample_logsigma_sq - tf.square(self.z_sample_mean) -
                tf.exp(self.z_sample_logsigma_sq), 1)

            # 3.) The reconstruction loss of state at the next time stamp (the negative log probability
            #     of the input under the reconstructed Gaussian distribution
            #     induced by the decoder in the data space).
            #     Adding 1e-10 to avoid evaluation of log(0.0)
            if FLAGS.deterministic_prediction == True:
                self.x_predict_mean, self.x_predict_logsigma_sq = self._decoder_network(
                    self.network_weights["weights_gener"],
                    self.network_weights["biases_gener"],
                    self.z_predict,
                    share=True)

                self.log_prob_reconst_next = -0.5 * (
                    self.input_x_next.get_shape()[1].value * 2 * np.pi +
                    tf.reduce_sum(tf.exp(self.x_predict_logsigma_sq), axis=1) +
                    tf.reduce_sum(
                        tf.square(self.input_x_next - self.x_predict_mean) /
                        tf.exp(self.x_predict_logsigma_sq),
                        axis=1)) + 1e-5  # For numerical stability
                # Q_eps_next = distributions.MultivariateNormalDiag(self.x_predict_mean, tf.sqrt(tf.exp(self.x_predict_logsigma_sq)))
                recon_loss -= self.log_prob_reconst_next

                reconstr_loss = \
                    -tf.reduce_sum(self.input_x_next * tf.log(1e-5 + self.x_predict_mean)
                                   + (1 - self.input_x_next) * tf.log(1e-5 + 1 - self.x_predict_mean),
                                   1)

            else:
                ########## Contruct the transition dynamics distribution
                Q_psi_scale = tf.cholesky(
                    tf.matmul(
                        tf.matmul(self.W_z,
                                  tf.diag(tf.exp(self.z_sample_logsigma_sq))),
                        tf.transpose(self.W_z)) + tf.eye(FLAGS.latent_dim))
                Q_psi = distributions.MultivariateNormalCholesky(
                    self.z_predict, Q_psi_scale)
                self.z_predict_sample = Q_psi.sample()
                #########

                ########## The reconstruction loss of state at the next time stamp
                self.x_predict_mean_next, self.x_predict_logsigma_sq_next = self._decoder_network(
                    self.network_weights["weights_gener"],
                    self.network_weights["biases_gener"],
                    self.z_predict_sample,
                    share=True)
                Q_eps_next = distributions.MultivariateNormalDiag(
                    self.x_predict_mean_next,
                    tf.sqrt(tf.exp(self.x_predict_logsigma_sq_next)))
                recon_loss -= tf.reduce_sum(
                    tf.log(
                        Q_eps_next.prob(
                            tf.reshape(self.input_x_next,
                                       [-1, FLAGS.input_dim]))) + 1e-5, 1)
                ##########

                if FLAGS.dyanmics_KL_constraint == True:
                    ########## KL diverngence between the transition dynamics distribution and the encoder net for x_t+1
                    self.z_sample_mean_next, self.z_sample_logsigma_sq_next = self._encoder_network(
                        self.network_weights["weights_recog"],
                        self.network_weights["biases_recog"],
                        self.input_x_next,
                        share=True)
                    Q_phi_next = distributions.MultivariateNormalDiag(
                        self.z_sample_mean_next,
                        tf.sqrt(tf.exp(self.z_sample_logsigma_sq_next)))
                    latent_loss += distributions.kl(Q_psi, Q_phi_next)
            self.cost = tf.reduce_mean(reconstr_loss +
                                       latent_loss)  # average over batch