Ejemplos de Normal en Python, ejemplos de tensorflow.distributions.Normal en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: forward.py Proyecto: dieterichlawson/survival

    def log_prob(self, zs, xs, fs, lens):
        # Compute means of z locations by adding drift to each z
        z_locs = zs[:, :-1, :] + self.drift[tf.newaxis, tf.newaxis, :]
        z_locs = tf.pad(z_locs, [[0, 0], [1, 0], [0, 0]], mode="CONSTANT")
        # Compute z log probs.
        log_p_z = tfd.Normal(loc=z_locs, scale=self.z_scale).log_prob(zs)
        # Compute x log probs as normals centered at each z.
        log_p_x_given_z = tfd.Normal(loc=zs, scale=self.x_scale).log_prob(xs)
        # Compute probability of failure log probs.
        # zs are [batch, time, state_size], weight matrix is [state_size, 1]
        # After multiplication should be [batch, time, 1]
        bern_logits = tf.einsum("ijk,kl->ijl", zs,
                                self.W_f) + self.b_f[tf.newaxis, tf.newaxis, :]
        bern_logits = tf.reshape(
            bern_logits, [tf.shape(bern_logits)[0],
                          tf.shape(bern_logits)[1]])
        bern_logits *= self.bern_temp
        log_p_f_given_z = tfd.Bernoulli(logits=bern_logits).log_prob(fs)
        # Sum over state dimension.
        log_p = tf.reduce_sum(log_p_z + log_p_x_given_z +
                              log_p_f_given_z[:, :, tf.newaxis],
                              axis=-1)

        # Mask out timesteps past the end.
        log_p *= tf.sequence_mask(lens, dtype=log_p.dtype)
        return log_p

Ejemplo n.º 2

0

Mostrar archivo

Archivo: backward.py Proyecto: dieterichlawson/survival

    def log_prob(self, zs, xs, T, z_lens, x_lens):
        """Computes the log probability of a set of samples.

    Args:
      zs: A set of [batch_size, max_z_num_timesteps, state_dim] latent states.
      xs: A set of [batch_size, max_x_num_timesteps, state_dim] observations.
      T: A set of [batch_size] integers denoting the number of censored steps.
      z_lens: A set of [batch_size] integers denoting the length of each 
        sequence of zs.
      x_lens: A set of [batch_size] integers denoting the length of each
        sequence of observations. Note that T must equal z_lens - x_lens.
    Returns:
      log_p_z: A [batch_size, max_z_num_timesteps] set of logprobs of zs.
      log_p_x_given_z: A [batch_size, max_x_num_timesteps] set of logprobs of xs.
      log_p_T: A [batch_size] set of logprobs of T.
    """
        # First, reverse the zs
        rev_zs = tf.reverse_sequence(zs, z_lens, seq_axis=1, batch_axis=0)
        batch_size = tf.shape(zs)[0]
        # Compute means of z locations by adding drift to each z
        rev_z_locs = rev_zs[:, :-1, :] + self.drift[tf.newaxis, tf.newaxis, :]
        z0_mu = tf.tile(self.z0_mu[tf.newaxis, tf.newaxis, :],
                        [batch_size, 1, 1])
        rev_z_locs = tf.concat([z0_mu, rev_z_locs], axis=1)
        # Compute z log probs.
        rev_log_p_z = tfd.Normal(loc=rev_z_locs,
                                 scale=self.z_scale).log_prob(rev_zs)
        rev_log_p_z *= tf.sequence_mask(z_lens,
                                        dtype=rev_log_p_z.dtype)[:, :,
                                                                 tf.newaxis]
        # Reverse the log probs back
        log_p_z = tf.reverse_sequence(rev_log_p_z,
                                      z_lens,
                                      seq_axis=1,
                                      batch_axis=0)
        log_p_z = tf.reduce_sum(log_p_z, axis=-1)

        # To compute the prob of xs, mask out all zs beyond the first x_len
        masked_zs = zs * tf.sequence_mask(x_lens,
                                          maxlen=tf.reduce_max(z_lens),
                                          dtype=zs.dtype)[:, :, tf.newaxis]
        masked_zs = masked_zs[:, :tf.reduce_max(x_lens), :]
        log_p_x_given_z = tfd.Normal(loc=masked_zs,
                                     scale=self.x_scale).log_prob(xs)
        log_p_x_given_z *= tf.sequence_mask(
            x_lens, dtype=log_p_x_given_z.dtype)[:, :, tf.newaxis]
        log_p_x_given_z = tf.reduce_sum(log_p_x_given_z, axis=-1)

        log_p_T = tfd.Categorical(logits=self.T_logits).log_prob(T)
        return log_p_z, log_p_x_given_z, log_p_T

Ejemplo n.º 3

0

Mostrar archivo

Archivo: losses.py Proyecto: Kautenja/robust-graph-convolutional-networks-against-adversarial-attacks-implementation

def kl_reg(mean, variance, weight: float = 5e-4):
    """
    Return the kl_regularization based on mean and variance tensors.

    Args:
        mean: the mean output from the first layer
        variance: the variance output from the first layer
        weight: the weight for the loss function

    Returns:
        the KL-divergence between normal distribution and model distribution
    """
    identity = distributions.Normal(K.zeros_like(mean), K.ones_like(mean))
    model = distributions.Normal(mean, K.sqrt(variance))
    return weight * distributions.kl_divergence(identity, model)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: activations.py Proyecto: liminghu/deep-ordinal-clm

 def __init__(self, num_classes, link_function, p, use_tau, **kwargs):
     self.num_classes = num_classes
     self.dist = distributions.Normal(loc=0., scale=1.)
     self.link_function = link_function
     self.p = p.copy()
     self.use_tau = use_tau
     super(CLM, self).__init__(**kwargs)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: ggc.py Proyecto: Kautenja/robust-graph-convolutional-networks-against-adversarial-attacks-implementation

    def call(self, inputs, **kwargs):
        """
        Forward pass through the layer.

        Args:
            inputs: the input tensors to pass through the layer

        Returns:
            the output tensors from the layer

        """
        if self.is_first:  # convert vectors to distributions
            mean, variance = self._call_first(inputs)
        else:  # transform the distributions
            mean, variance = self._call_generic(inputs)
        # pass the mean and variance through the activation
        mean = self.mean_activation(mean)
        variance = self.variance_activation(variance)
        # apply the dropout if enabled
        if self.dropout:
            mean = K.dropout(mean, self.dropout)
            variance = K.dropout(variance, self.dropout)
        # sample from the distribution if the last layer
        if self.is_last:
            dist = distributions.Normal(mean, K.sqrt(variance))
            return self.last_activation(dist.sample())
        return [mean, variance]

Ejemplo n.º 6

0

Mostrar archivo

Archivo: losses.py Proyecto: rgcl/astroNN

def robust_binary_crossentropy(y_true, y_pred, logit_var):
    """
    Calculate binary accuracy, ignoring the magic number

    :param y_true: Ground Truth
    :type y_true: Union(tf.Tensor, tf.Variable)
    :param y_pred: Prediction in logits space
    :type y_pred: Union(tf.Tensor, tf.Variable)
    :param logit_var: Predictive variance in logits space
    :type logit_var: Union(tf.Tensor, tf.Variable)
    :return: categorical cross-entropy
    :rtype: tf.Tensor
    :History: 2018-Mar-15 - Written - Henry Leung (University of Toronto)
    """
    variance_depressor = tf.reduce_mean(
        tf.exp(logit_var) - tf.ones_like(logit_var))
    undistorted_loss = binary_crossentropy(y_true, y_pred, from_logits=True)
    dist = distributions.Normal(loc=y_pred, scale=logit_var)

    mc_result = tf.map_fn(lambda x: -tf.nn.elu(
        undistorted_loss - binary_crossentropy(y_true, x, from_logits=True)),
                          dist.sample([25]),
                          dtype=tf.float32)

    variance_loss = tf.reduce_mean(mc_result, axis=0) * undistorted_loss

    return (variance_loss + undistorted_loss +
            variance_depressor) * magic_correction_term(y_true)

Ejemplo n.º 7

0

Mostrar archivo

def decoder2(z, opt, reuse=False):
    """ decoder network """
    with tf.compat.v1.variable_scope("decoder2", reuse=reuse):

        de_dense1 = tf.layers.dense(
            inputs=z,
            units=opt.inflate_to_size1,
            activation=None,
            name="decoder_dense1",
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        de_dense1 = tf.layers.batch_normalization(de_dense1)
        de_dense1 = tf.nn.leaky_relu(de_dense1)
        de_dense1 = tf.layers.dropout(de_dense1, opt.dropout_rate)

        de_dense2 = tf.layers.dense(
            inputs=de_dense1,
            units=opt.inflate_to_size2,
            activation=None,
            name="decoder_dense2",
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        de_dense2 = tf.layers.batch_normalization(de_dense2)
        de_dense2 = tf.nn.leaky_relu(de_dense2)
        de_dense2 = tf.layers.dropout(de_dense2, opt.dropout_rate)

        de_loc = tf.layers.dense(inputs=de_dense2,
                                 units=opt.gex_size,
                                 activation=None,
                                 name="decoder_loc")

        de_scale = tf.ones_like(de_loc)

        return ds.Normal(de_loc, de_scale)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: backward.py Proyecto: dieterichlawson/survival

        def while_step(t, prev_z, rev_log_q_z_ta, rev_zs_ta):
            # Compute the distribution over z_{T-t}

            # [batch_size] steps till next x
            steps_till_next_x = tf.maximum(T - t, 0)
            # Fetch the next x value.
            next_x_ind = tf.minimum(tf.maximum(t - T, 0), x_lens - 1)
            r = tf.range(0, batch_size)
            inds = tf.stack([r, next_x_ind], axis=-1)
            x = tf.gather_nd(rev_xs, inds)

            z_loc_input = tf.concat(
                [x, prev_z,
                 tf.to_float(steps_till_next_x)[:, tf.newaxis]],
                axis=1)
            z_loc = tf.matmul(z_loc_input, self.W_z) + self.b_z[tf.newaxis, :]
            log_sigmas = tf.gather(self.log_sigma, steps_till_next_x)
            z_scale = tf.math.maximum(tf.math.softplus(log_sigmas),
                                      self.sigma_min)
            q_z = tfd.Normal(loc=z_loc, scale=z_scale)
            new_z = q_z.sample()
            log_q_new_z = q_z.log_prob(new_z)

            new_z = tf.where(t < z_lens, new_z, tf.zeros_like(new_z))
            log_q_new_z = tf.where(t < z_lens, log_q_new_z,
                                   tf.zeros_like(log_q_new_z))

            new_rev_log_q_z_ta = rev_log_q_z_ta.write(t, log_q_new_z)
            new_rev_zs_ta = rev_zs_ta.write(t, new_z)
            return t + 1, new_z, new_rev_log_q_z_ta, new_rev_zs_ta

Ejemplo n.º 9

0

Mostrar archivo

def tf_standardGaussian_prior(batch_size, dim):
    """
	TensorFlow standard Gaussian distributions
	"""
    shp = [batch_size, dim]
    loc = tf.zeros(shp)
    scale = tf.ones(shp)
    return ds.Normal(loc, scale)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: forward.py Proyecto: dieterichlawson/survival

    def sample(self, batch_size, z0=None, max_length=50):
        zs_ta = tf.TensorArray(dtype=self.dtype,
                               size=5,
                               dynamic_size=True,
                               name="sample_zs")
        fs_ta = tf.TensorArray(dtype=tf.int32,
                               size=5,
                               dynamic_size=True,
                               name="sample_fs")

        t0 = tf.constant(0)
        failed = tf.zeros([batch_size], dtype=tf.bool)
        lens = tf.ones([batch_size], dtype=tf.int32)
        if z0 is None:
            z0 = tf.zeros([batch_size, self.state_size],
                          dtype=self.dtype) - self.drift[tf.newaxis, :]

        def while_predicate(t, failed, *unused_args):
            return tf.math.logical_and(
                tf.math.reduce_any(tf.math.logical_not(failed)), t < 50)

        def while_step(t, failed, lens, prev_z, zs_ta, fs_ta):
            # z_loc is [batch_size, state_size]
            z_loc = prev_z + self.drift[tf.newaxis, :]
            # new_zs is [batch_size, state_size]
            new_zs = tfd.Normal(loc=z_loc, scale=self.z_scale).sample()
            # multiply [batch_size, state_size] new_zs by [state_size, 1] W_f
            # then add [:, 1] b_f.
            bern_logits = tf.matmul(new_zs, self.W_f) + self.b_f[tf.newaxis, :]
            bern_logits = tf.reshape(bern_logits, [batch_size])
            bern_logits *= self.bern_temp
            # Sample a [batch_size] set of failure indicators
            new_fs = tfd.Bernoulli(logits=bern_logits).sample()
            # Update Tensorarrays
            new_zs_ta = zs_ta.write(
                t, tf.where(failed, tf.zeros_like(new_zs), new_zs))
            new_fs_ta = fs_ta.write(
                t, tf.where(failed, tf.zeros_like(new_fs), new_fs))
            # Update failure indicators
            new_failed = tf.logical_or(failed, tf.equal(new_fs, 1))
            # Update lengths (add one only if the process hasn't failed)
            new_lens = lens + (1 - tf.to_int32(new_failed))
            return t + 1, new_failed, new_lens, new_zs, new_zs_ta, new_fs_ta

        _, _, lens, _, zs_ta, fs_ta = tf.while_loop(while_predicate,
                                                    while_step,
                                                    loop_vars=(t0, failed,
                                                               lens, z0, zs_ta,
                                                               fs_ta),
                                                    parallel_iterations=1)

        zs = zs_ta.stack()
        fs = fs_ta.stack()
        xs = tfd.Normal(loc=zs, scale=self.x_scale).sample()

        return zs, xs, fs, lens

Ejemplo n.º 11

0

Mostrar archivo

Archivo: lkj.py Proyecto: xiaofengzhiyu/probability

def _uniform_unit_norm(dimension, shape, dtype, seed):
    """Returns a batch of points chosen uniformly from the unit hypersphere."""
    # This works because the Gaussian distribution is spherically symmetric.
    # raw shape: shape + [dimension]
    raw = tfd.Normal(loc=dtype.as_numpy_dtype(0.),
                     scale=dtype.as_numpy_dtype(1.)).sample(tf.concat(
                         [shape, [dimension]], axis=0),
                                                            seed=seed())
    unit_norm = raw / tf.norm(raw, ord=2, axis=-1)[..., tf.newaxis]
    return unit_norm

Ejemplo n.º 12

0

Mostrar archivo

    def logposterior(self, params_log):
        """TODO: Docstring for posterior.

        :params_log: TODO
        :tf_dis: TODO
        :tf: TODO
        :returns: TODO

        """
        zeros_n = tf.zeros([self.n, 1])
        sigma2 = tf.exp(params_log[0, 0])
        phi = tf.exp(params_log[1, 0])

        Sigma_gp = tf_cov_exp(self.tf_dis, sigma2, phi, 0.0)
        Sigma_marginal = self.prior_c_sigma2 + Sigma_gp
        Sigma_z = Sigma_marginal + tf.eye(self.n)
        posterior_prob_log = dmvnorm(self.tf_y, zeros_n, Sigma_z) + \
                distr.Normal(tf.log(1.0), 0.4).log_prob(params_log[0,0]) + \
                distr.Normal(tf.log(0.1), 0.4).log_prob(params_log[1,0])
        return posterior_prob_log

Ejemplo n.º 13

0

Mostrar archivo

def make_entity_bias(entity_batch):
    bias_batch = tf.nn.embedding_lookup(bias, entity_batch)
    if options.degenerate:
        std_dev = 0.
    else:
        # 1/tf.sqrt(prior_prec_entity[:, 0])  # More precise if more ratings, should be clipped
        # 1.  # Too imprecise
        std_dev = tf.nn.softplus(bias_batch[:, 1])
    return tfd.Normal(loc=bias_batch[:, 0],
                      scale=std_dev,
                      name='bias_posterior')

Ejemplo n.º 14

0

Mostrar archivo

 def __init__(self, mean):
     """Gaussian with negative squared error as log probability.
 The log_prob() method computes the sum of the element-wise squared
 distances. This means that its value is both unnormalized and does not
 depend on the standard deviation.
 Args:
   mean: Mean of the distribution.
   stddev: Standard deviation, ignored by log_prob().
 """
     self._dist = tfd.Normal(mean, 1.0)
     self._mean = mean

Ejemplo n.º 15

0

Mostrar archivo

 def mh_r(x0, x1):  # log P(x1)g(x0|x1)
     e_x1 = transformed_energy(x1, energ_emb, m)[:, 0]
     inv_diag_g_x1 = 1. / m.metric_diag(x1)
     grad_x1 = tf.gradients(e_x1, [x1])[0]
     nx_mean = x1 + stepsz[:, None] * (-inv_diag_g_x1 * grad_x1 +
                                       m.invdiag_grad(x1))
     g_x0_x1 = tfd.Normal(loc=nx_mean,
                          scale=tf.sqrt(2 * stepsz[:, None] *
                                        inv_diag_g_x1)).log_prob(x0)
     g_x0_x1 = tf.reduce_sum(g_x0_x1, axis=-1)
     return g_x0_x1 - e_x1

Ejemplo n.º 16

0

Mostrar archivo

Archivo: prior.py Proyecto: zphilip/auto-encoding-variational-bayes

def prior(latent_size):
    """Prior builds the prior distribution against the provided latent tensor.

    Args:
        latent_size (int): The dimension of the latent space.

    Returns:
        tf.distributions.Normal: The prior over a single latent tensor.
    """
    shp = [latent_size]
    loc = tf.zeros(shp)
    scale = tf.ones(shp)
    return ds.Normal(loc, scale)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: decoder.py Proyecto: zphilip/auto-encoding-variational-bayes

def decoder(latent, img_size, units):
    """Decoder builds a decoder network on the given latent variable tensor.

    Args:
        lv (tf.Tensor): sample_size x batch_size x latent_size latent tensor.

    Returns:
        (tf.distribution.Normal): The batch_shape = (sample x batch x img)
            normal distributions representing the sampled img likelihoods.
    """
    hidden = tf.layers.dense(latent, units)

    loc = tf.layers.dense(hidden, img_size)
    scale = tf.layers.dense(hidden, img_size)
    return ds.Normal(loc, scale)

Ejemplo n.º 18

0

Mostrar archivo

def encoder(img, latent_size, units):
    """Encoder builds an encoder network against the provided image tensor.

    Args:
        img (tf.Tensor): batch_size x img_size tensor of flat images.

    Returns:
        (tf.distribution.Normal): The batch_shape = (batch_size, latent_size)
            batch of posterior normal distributions.
    """
    hidden = tf.layers.dense(img, units)

    loc = tf.layers.dense(hidden, latent_size)
    scale = tf.layers.dense(hidden, latent_size)
    return ds.Normal(loc, scale)

Ejemplo n.º 19

0

Mostrar archivo

def make_sparse_pred_reg(sigma2, x):
    #x = tf.cast(x, tf.float32)
    x2 = x  # ** 2  # FIXME if x is 0/1 it's okay
    this_bias = tf.reduce_sum(all_bias, axis=0)
    this_feat = tf.reduce_sum(all_feat, axis=0)
    w = tf.reshape(this_bias, (-1, 1))
    # w = tf.reshape(bias[:, 0], (-1, 1))  # Otherwise tf.matmul is crying
    # V = users[:, embedding_size:]
    V = this_feat
    V2 = V**2
    logits = (tf.squeeze(tf.sparse_tensor_dense_matmul(x, w)) +
              0.5 * tf.reduce_sum(tf.sparse_tensor_dense_matmul(x, V)**2 -
                                  tf.sparse_tensor_dense_matmul(x2, V2),
                                  axis=1))
    return tfd.Normal(logits, scale=sigma2)

Ejemplo n.º 20

0

Mostrar archivo

    def update_mala(self, Sigma_proposal, h):
        """TODO: Docstring for update_rwmh.

        :L_proposal: Lower traingular cholesky decomposition of the covariance of the
        proposal distribution
        :returns: TODO

        """
        zeros_n = tf.zeros([self.n, 1])
        dims = tf.shape(self.params_log)

        Sigma_proposal = h * Sigma_proposal
        L_proposal = tf.cholesky(Sigma_proposal)
        L_inv = tf.matrix_inverse(L_proposal)

        candidate = self.params_log
        candidate += 0.5 * tf.matmul(Sigma_proposal, self.params_logpost_grad)
        candidate += tf.matmul(L_proposal, distr.Normal(0.0, 1.0).sample(dims))
        cand_logpost = self.logposterior(candidate)
        cand_logpost_grad = tf.gradients(cand_logpost, candidate)[0]
        # cand_logpost_grad = tf.gradients(self.logposterior(candidate), candidate)[0]

        center_current =  self.params_log - candidate - \
                0.5 * tf.matmul(Sigma_proposal, cand_logpost_grad)
        center_cand =  candidate - self.params_log - \
                0.5 * tf.matmul(Sigma_proposal, self.params_logpost_grad)

        logprob = cand_logpost - self.params_logpost
        logprob -= 0.5 * tf.reduce_sum(
            tf.square(tf.matmul(L_inv, center_current)))
        logprob += 0.5 * tf.reduce_sum(tf.square(tf.matmul(L_inv,
                                                           center_cand)))

        log_unif = tf.log(distr.Uniform().sample())
        new, new_logpost, new_logpost_grad = tf.cond(
            tf.greater(logprob, log_unif), lambda:
            (candidate, cand_logpost, cand_logpost_grad), lambda:
            (self.params_log, self.params_logpost, self.params_logpost_grad))

        op_param = tf.assign(self.params_log, new)
        op_logpost = tf.assign(self.params_logpost, new_logpost)
        op_logpost_grad = tf.assign(self.params_logpost_grad, new_logpost_grad)
        return op_param, op_logpost, op_logpost_grad

Ejemplo n.º 21

0

Mostrar archivo

def calculate_probability(training_results, number_labels, latent_dim,
                          stddv_datapoints, X_test):
    final_proba = []
    for i in range(number_labels):
        z = list(
            pd.DataFrame(training_results[i][1].z_mean.transpose()).mean())
        z_av = np.array(z, dtype="float32")
        z_av.shape = (1, latent_dim)
        x_dist = tfd.Normal(
            loc=tf.matmul(z_av, training_results[i][1].w_mean.transpose()),
            scale=stddv_datapoints *
            tf.ones([1, training_results[i][1].w_mean.shape[0]]),
            name="x_experiment{}".format(i))
        proba = []
        for testpoint in X_test:
            probability = tf.reduce_mean(x_dist.log_prob(testpoint)).eval()
            proba.append(probability)
        final_proba.append(proba)
    return np.array(final_proba).transpose()

Ejemplo n.º 22

0

Mostrar archivo

Archivo: forward.py Proyecto: dieterichlawson/survival

 def while_step(t, prev_z, log_q_z_ta, zs_ta):
     x = xs_ta.read(t)
     # Concatenate the previous z and current x along state dimension
     # z and x are currently [batch, state_size]
     q_input = tf.concat([prev_z, x], 1)
     # Multiply by parameters to create mean vector
     q_loc = tf.matmul(q_input, self.W_mu) + self.b_mu[tf.newaxis, :]
     # Create scale vector by softplussing parameters
     q_scale = tf.math.maximum(tf.math.softplus(self.log_sigma),
                               self.sigma_min)
     # Sample and compute logprob
     q_z = tfd.Normal(loc=q_loc, scale=q_scale)
     new_z = q_z.sample()
     # Update TensorArray
     new_zs_ta = zs_ta.write(
         t, tf.where(t < lens, new_z, tf.zeros_like(new_z)))
     new_log_q_z = q_z.log_prob(new_z)
     new_log_q_z_ta = log_q_z_ta.write(
         t, tf.where(t < lens, new_log_q_z, tf.zeros_like(new_log_q_z)))
     return (t + 1, new_z, new_log_q_z_ta, new_zs_ta)

Ejemplo n.º 23

0

Mostrar archivo

def MutualInformationLowerBound(c_rec, c_sample, opt):
    """
	compute the mutual information lower bound for InfoGANs
	"""

    ll_con = None
    est_vec = c_rec[:, :opt.code_size]
    c_sample_vec = c_sample[:, :opt.code_size]

    if opt.InfoGAN_fix_std:
        std_vec = tf.ones_like(est_vec)
    else:
        std_vec = c_rec[:, opt.code_size:2 * opt.code_size]
        std_vec = tf.nn.softplus(std_vec)

    ll_con_dist = ds.Normal(est_vec, std_vec)
    ll_conLogProb = ll_con_dist.log_prob(c_sample_vec)
    ll_con = tf.reduce_sum(ll_conLogProb, [1])

    result_con = tf.reduce_mean(ll_con)

    return result_con

Ejemplo n.º 24

0

Mostrar archivo

Archivo: forward.py Proyecto: dieterichlawson/survival

 def while_step(t, failed, lens, prev_z, zs_ta, fs_ta):
     # z_loc is [batch_size, state_size]
     z_loc = prev_z + self.drift[tf.newaxis, :]
     # new_zs is [batch_size, state_size]
     new_zs = tfd.Normal(loc=z_loc, scale=self.z_scale).sample()
     # multiply [batch_size, state_size] new_zs by [state_size, 1] W_f
     # then add [:, 1] b_f.
     bern_logits = tf.matmul(new_zs, self.W_f) + self.b_f[tf.newaxis, :]
     bern_logits = tf.reshape(bern_logits, [batch_size])
     bern_logits *= self.bern_temp
     # Sample a [batch_size] set of failure indicators
     new_fs = tfd.Bernoulli(logits=bern_logits).sample()
     # Update Tensorarrays
     new_zs_ta = zs_ta.write(
         t, tf.where(failed, tf.zeros_like(new_zs), new_zs))
     new_fs_ta = fs_ta.write(
         t, tf.where(failed, tf.zeros_like(new_fs), new_fs))
     # Update failure indicators
     new_failed = tf.logical_or(failed, tf.equal(new_fs, 1))
     # Update lengths (add one only if the process hasn't failed)
     new_lens = lens + (1 - tf.to_int32(new_failed))
     return t + 1, new_failed, new_lens, new_zs, new_zs_ta, new_fs_ta

Ejemplo n.º 25

0

Mostrar archivo

    def update_rwmh(self, L_proposal):
        """TODO: Docstring for update_rwmh.

        :L_proposal: Lower traingular cholesky decomposition of the covariance of the
        proposal distribution
        :returns: TODO

        """
        zeros_n = tf.zeros([self.n, 1])
        dims = tf.shape(self.params_log)

        candidate = self.params_log
        candidate += tf.matmul(L_proposal, distr.Normal(0.0, 1.0).sample(dims))
        cand_logpost = self.logposterior(candidate)
        logprob = cand_logpost - self.params_logpost

        log_unif = tf.log(distr.Uniform().sample())
        new, new_logpost = tf.cond(
            tf.greater(logprob, log_unif), lambda: (candidate, cand_logpost),
            lambda: (self.params_log, self.params_logpost))

        op_param = tf.assign(self.params_log, new)
        op_logpost = tf.assign(self.params_logpost, new_logpost)
        return op_param, op_logpost

Ejemplo n.º 26

0

Mostrar archivo

Archivo: activations.py Proyecto: liminghu/deep-ordinal-clm

    def _nnpom(self, projected, thresholds):
        if self.use_tau == 1:
            projected = K.reshape(projected, shape=[-1]) / self.tau
        else:
            projected = K.reshape(projected, shape=[-1])

        # projected = K.Print(projected, data=[K.reduce_min(projected), K.reduce_max(projected), K.reduce_mean(projected)], message='projected min max mean')

        m = K.shape(projected)[0]
        a = K.reshape(K.tile(thresholds, [m]), shape=[m, -1])
        b = K.transpose(
            K.reshape(K.tile(projected, [self.num_classes - 1]), shape=[-1,
                                                                        m]))
        z3 = a - b

        # z3 = K.cond(K.reduce_min(K.abs(z3)) < 0.01, lambda: K.Print(z3, data=[K.reduce_min(K.abs(z3))], message='z3 abs min', summarize=100), lambda: z3)

        if self.link_function == 'probit':
            a3T = self.dist.cdf(z3)
        elif self.link_function == 'cloglog':
            a3T = 1 - K.exp(-K.exp(z3))
        elif self.link_function == 'glogit':
            a3T = 1.0 / K.pow(1.0 + K.exp(-self.lmbd *
                                          (z3 - self.mu)), self.alpha)
        elif self.link_function == 'cauchit':
            a3T = K.atan(z3 / math.pi) + 0.5
        elif self.link_function == 'lgamma':
            a3T = K.cond(
                self.q < 0,
                lambda: igammac(K.pow(self.q, -2),
                                K.pow(self.q, -2) * K.exp(self.q * z3)),
                lambda: K.cond(
                    self.q > 0, lambda: igamma(
                        K.pow(self.q, -2),
                        K.pow(self.q, -2) * K.exp(self.q * z3)), lambda: self.
                    dist.cdf(z3)))
        elif self.link_function == 'gauss':
            # a3T = 1.0 / 2.0 + K.sign(z3) * K.igamma(1.0 / self.alpha, K.pow(K.abs(z3) / self.r, self.alpha)) / (2 * K.exp(K.lgamma(1.0 / self.alpha)))
            # z3 = K.Print(z3, data=[K.reduce_max(K.abs(z3))], message='z3 abs max')
            # K.sigmoid(z3 - self.p['mu']) - 1)
            a3T = 1.0 / 2.0 + K.tanh(z3 - self.p['mu']) * igamma(
                1.0 / self.p['alpha'],
                K.pow(K.pow((z3 - self.p['mu']) / self.p['r'], 2), self.
                      p['alpha'])) / (2 * K.exp(lgamma(1.0 / self.p['alpha'])))
        elif self.link_function == 'expgauss':
            u = self.lmbd * (z3 - self.mu)
            v = self.lmbd * self.sigma
            dist1 = distributions.Normal(loc=0., scale=v)
            dist2 = distributions.Normal(loc=v, scale=K.pow(v, 2))
            a3T = dist1.cdf(u) - K.exp(-u + K.pow(v, 2) / 2 +
                                       K.log(dist2.cdf(u)))
        elif self.link_function == 'ggamma':
            a3T = igamma(self.p['d'] / self.p['p'],
                         K.pow((z3 / self.p['a']), self.p['p'])) / K.exp(
                             lgamma(self.p['d'] / self.p['p']))
        else:
            a3T = 1.0 / (1.0 + K.exp(-z3))

        a3 = K.concatenate([a3T, K.ones([m, 1])], axis=1)
        a3 = K.concatenate(
            [K.reshape(a3[:, 0], shape=[-1, 1]), a3[:, 1:] - a3[:, 0:-1]],
            axis=-1)

        return a3

Ejemplo n.º 27

0

Mostrar archivo

def make_likelihood_reg(sigma2, feat_users, feat_items, bias_users,
                        bias_items):
    logits = global_bias + tf.reduce_sum(feat_users * feat_items,
                                         1) + bias_users + bias_items
    return tfd.Normal(logits, scale=sigma2, name='pred')

Ejemplo n.º 28

0

Mostrar archivo

Archivo: test.py Proyecto: seanxu1015/vae

def make_prior():
    return tfd.Normal(loc=[0.] * embedding_size, scale=[1.] * embedding_size)

Ejemplo n.º 29

0

Mostrar archivo

    def calc_KLdiv(self, z_prior, z_post):
        post_mu = tf.tile(z_post['mu'][:, tf.newaxis, :],
                          [1, self.num_classes_kn, 1])

        if self.z_dist == 'N':
            post_sigma = tf.tile(z_post['sigma'][:, tf.newaxis, :],
                                 [1, self.num_classes_kn, 1])

            dist_prior = tfd.Normal(loc=z_prior['mu'],
                                    scale=z_prior['sigma'],
                                    allow_nan_stats=~self.debug)
            dist_post = tfd.Normal(loc=post_mu,
                                   scale=post_sigma,
                                   allow_nan_stats=~self.debug)
            KLdiv = dist_post.kl_divergence(dist_prior)  # [B, hyp, z]
        elif self.z_dist == 'B':
            if self.z_B_kl in [20, 212]:
                # Monte carlo approximation on the logistic node (a true lower bound but can exhibit higher variance)
                post_log_sample = tf.tile(
                    z_post['log_sample'][:, tf.newaxis, :],
                    [1, self.num_classes_kn, 1])
                dist_prior = pseudo_LogRelaxedBernoulli(
                    logits=z_prior['mu'],
                    temperature=self.m['VAEEncoder'].temp_prior,
                    allow_nan_stats=~self.debug)
                dist_post = pseudo_LogRelaxedBernoulli(
                    logits=post_mu,
                    temperature=self.m['VAEEncoder'].temp_post,
                    allow_nan_stats=~self.debug)
                KLdiv = dist_post.log_prob(
                    post_log_sample) - dist_prior.log_prob(
                        post_log_sample)  # [B, hyp, z]
                if self.z_B_kl == 212:
                    # slightly different relaxation from equation 21, but seemed to learn quite well
                    KLdiv *= dist_post.prob(post_log_sample)
            elif self.z_B_kl == 21:
                # relax computation of the discrete log mass: not a true lower bound, be aware of overfitting on spurious elements in this 'KL'
                def pseudo_kl(a_logits, b_logits, z_logits):
                    """Bernoulli-kl with 'external' labels given by z_logits"""
                    delta_probs0 = tf.nn.softplus(-b_logits) - tf.nn.softplus(
                        -a_logits)
                    delta_probs1 = tf.nn.softplus(b_logits) - tf.nn.softplus(
                        a_logits)
                    return (tf.nn.sigmoid(z_logits) * delta_probs0 +
                            tf.nn.sigmoid(-z_logits) * delta_probs1)

                post_log_sample = tf.tile(
                    z_post['log_sample'][:, tf.newaxis, :],
                    [1, self.num_classes_kn, 1])
                KLdiv = pseudo_kl(post_mu,
                                  z_prior['mu'],
                                  z_logits=post_log_sample)
            elif self.z_B_kl == 22:
                # replace discrete mass with the analytic discrete KL: not a true lower bound, be aware of overfitting on spurious elements in this 'KL'
                dist_prior = tfd.Bernoulli(logits=z_prior['mu'],
                                           allow_nan_stats=~self.debug)
                dist_post = tfd.Bernoulli(logits=post_mu,
                                          allow_nan_stats=~self.debug)
                KLdiv = dist_post.kl_divergence(dist_prior)  # [B, hyp, z]
            else:
                raise ValueError('Unknown z_B_kl: {}'.format(self.z_B_kl))
        else:
            raise ValueError('Unknown z_dist: {}'.format(self.z_dist))

        KLdiv = self.z_kl_weight * tf.reduce_sum(KLdiv, axis=2)

        if self.uk_cycling:
            # mask the prediction error of the current uk classes with the highest prediction error of the observation
            KLdiv = tf.where(
                self.current_cycl_uk_mask,
                tf.tile(tf.reduce_max(KLdiv, axis=1, keep_dims=True),
                        [1, self.num_classes_kn]), KLdiv)

        return KLdiv

Ejemplo n.º 30

0

Mostrar archivo

Archivo: activeClassifier.py Proyecto: dHonerkamp/ActiveClassifier

    def __init__(self, FLAGS, env, phase):
        super().__init__(FLAGS, env, phase)
        min_glimpses = 3
        random_locations = phase['random_locations']  # tf.logical_and(self.epoch_num < FLAGS.pre_train_epochs, self.is_training)

        # Initialise modules
        n_policies = FLAGS.num_classes if FLAGS.planner == 'ActInf' else 1
        policyNet = PolicyNetwork(FLAGS, self.B, n_policies)
        glimpseEncoder = GlimpseEncoder(FLAGS)
        VAEencoder   = Encoder(FLAGS, env.patch_shape_flat)
        VAEdecoder   = Decoder(FLAGS, env.patch_shape_flat)
        stateTransition_AC = StateTransition_AC(FLAGS.size_rnn, 2*FLAGS.size_z)
        fc_baseline = tf.layers.Dense(1, name='fc_baseline')

        submodules = {'policyNet': policyNet,
                      'VAEencoder': VAEencoder,
                      'VAEdecoder': VAEdecoder}
        if FLAGS.planner == 'ActInf':
            planner = ActInfPlanner(FLAGS, submodules, self.B, env.patch_shape_flat, self.C, stateTransition_AC)
        elif FLAGS.planner == 'RL':
            planner = REINFORCEPlanner(FLAGS, submodules, self.B, env.patch_shape_flat)
        else:
            raise ValueError('Undefined planner.')

        self.n_policies = planner.n_policies

        # variables to remember. Probably to be implemented via TensorArray
        out_ta = []
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='obs'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='glimpse_nlls_posterior'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='glimpse_reconstr'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='zs_post'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='G'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='actions'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='actions_mean'))
        out_ta.append(tf.TensorArray(tf.int32,   size=min_glimpses, dynamic_size=True, name='decisions'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='rewards'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='baselines'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses+1, dynamic_size=True, name='current_cs'))
        out_ta.append(tf.TensorArray(tf.bool,    size=min_glimpses, dynamic_size=True, name='done'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='exp_exp_obs'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='exp_obs'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='H_exp_exp_obs'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='exp_H'))
        out_ta.append(tf.TensorArray(tf.float32, size=min_glimpses, dynamic_size=True, name='potential_actions'))

        ta_d = {}
        for i, ta in enumerate(out_ta):
            ta_d[ta.handle.name.split('/')[-1].replace(':0', '')] = ta

        # Initial values
        last_done = tf.zeros([self.B], dtype=tf.bool)
        last_decision = tf.fill([self.B], -1)
        # in case starting calculation after initial observation (as first location should be identical for all images)
        next_action, next_action_mean = policyNet.inital_loc()
        next_decision = tf.fill([self.B], -1)
        current_state = stateTransition_AC.initial_state(self.B, next_action)

        ta_d['current_cs'] = write_zero_out(0, ta_d['current_cs'], current_state['c'], last_done)

        # out of loop to not create new tensors every step
        one_hot_label = tf.one_hot(tf.range(FLAGS.num_classes), depth=FLAGS.num_classes)
        one_hot_label_repeated = repeat_axis(one_hot_label, 0, self.B)  # [B * hyp, hyp]

        def current_belief_update(current_state, new_observation, exp_obs_prior, time):
            """Given a new observation, and the last believes over the state, update the believes over the states.
            The sufficient statistic of the old state in this case is z, as the VAEencoder is class-specific.

            Returns:
                c: [B, num_classes} believe over classes based on past observations
                zs_post: [B, num_classes, size_z] inferred zs conditional on each class
                glimpse_nll_stacked: [B, num_classes] likelihood of each past observation conditional on each class
                """
            with tf.name_scope('Belief_update'):
                # Infer posterior z for all hypotheses
                with tf.name_scope('poterior_inference_per_hyp'):
                    class_conditional_s = tf.reshape(current_state['s'], [self.B * FLAGS.num_classes, FLAGS.size_rnn])
                    new_action_repeated = repeat_axis(current_state['l'], 0, FLAGS.num_classes)
                    new_observation_repeated = repeat_axis(new_observation, 0, FLAGS.num_classes)

                    z_post = VAEencoder.posterior_inference(one_hot_label_repeated,
                                                            class_conditional_s,
                                                            tf.stop_gradient(new_action_repeated),
                                                            new_observation_repeated)
                    # 2 possibilties to infer state from received observations:
                    # i)  judge by likelihood of the observations under each hypothesis
                    # ii) train a separate model (e.g. LSTM) for infering states
                    # TODO: CAN WE DO THIS IN AN ENCODED SPACE?
                    posterior = VAEdecoder.decode(one_hot_label_repeated,
                                                  class_conditional_s,
                                                  z_post['sample'],
                                                  tf.stop_gradient(new_action_repeated),
                                                  new_observation_repeated)  # ^= filtering, given that transitions are deterministic

                    zs_post         = tf.reshape(tf.concat([z_post['mu'], z_post['sigma']], axis=1),
                                                 [self.B, FLAGS.num_classes, 2*FLAGS.size_z])
                    zs_post_samples = tf.reshape(z_post['sample'], [self.B, FLAGS.num_classes, FLAGS.size_z])
                    reconstr_post   = tf.reshape(posterior['sample'], [self.B, FLAGS.num_classes, env.patch_shape_flat])
                    nll_post        = tf.reshape(posterior['loss'], [self.B, FLAGS.num_classes])

                # believes over the classes based on all past observations (uniformly weighted)
                with tf.name_scope('belief_update'):
                    # TODO: THINK ABOUT THE SHAPE. PRIOR SHOULD BE FOR EACH HYP. USE new_observation_repeated?
                    prior_nll = calculate_gaussian_nll(exp_obs_prior, new_observation)

                    if time == 0:
                        c = tf.nn.softmax(-prior_nll, axis=1)
                    else:
                        c = (1. / time) * tf.nn.softmax(-prior_nll, axis=1) + (time - 1.) / time * current_state['c']

                return (c,  # [B, num_classes]
                        zs_post,  # [B, num_classes, 2*z]
                        zs_post_samples,  # [B, num_classes, z]
                        nll_post,  # [B, num_classes]
                        reconstr_post)  # [B, num_classes, glimpse]


        with tf.name_scope('Main_loop'):
            for time in range(FLAGS.num_glimpses):
                if time == 0:


                if time > 1:
                    if random_locations:
                        next_decision, next_action, next_action_mean, pl_records = planner.random_policy()
                    else:
                        next_decision, next_action, next_action_mean, next_exp_obs, pl_records = planner.planning_step(current_state, z_samples, time, self.is_training)

                    # TODO : Could REUSE FROM PLANNING STEP
                    current_state = stateTransition_AC([last_z, labels, next_action], last_state)

                observation, corr_classification_fb, done = env.step(next_action, next_decision)
                done = tf.logical_or(last_done, done)
                obs_enc = glimpseEncoder.encode(observation)


                current_state['c'], zs_post, z_samples, nll_posterior, reconstr_posterior = current_belief_update(current_state, obs_enc, next_exp_obs, time)
                # baseline = fc_baseline(tf.stop_gradient(tf.concat([current_c, tf.fill([self.B, 1], tf.cast(time, tf.float32))], axis=1)))
                baseline = tf.squeeze(fc_baseline(tf.stop_gradient(current_state['c'])), 1)

                # t=0 to T-1. ACTION RECORDING HAS TO STAY BEFORE PLANNING OR WILL BE OVERWRITTEN
                ta_d['obs']                      = write_zero_out(time, ta_d['obs'], observation, done)
                ta_d['zs_post']                  = write_zero_out(time, ta_d['zs_post'], zs_post, done)  # [B, n_policies, size_z]
                ta_d['glimpse_nlls_posterior']   = write_zero_out(time, ta_d['glimpse_nlls_posterior'], nll_posterior, done)  # [B, n_policies]
                ta_d['glimpse_reconstr']         = write_zero_out(time, ta_d['glimpse_reconstr'], reconstr_posterior, done)  # for visualisation only
                ta_d['actions']                  = write_zero_out(time, ta_d['actions'], next_action, done)  # location actions, not including the decision acions
                ta_d['actions_mean']             = write_zero_out(time, ta_d['actions_mean'], next_action_mean, done)  # location actions, not including the decision acions
                ta_d['baselines']                = write_zero_out(time, ta_d['baselines'], baseline, done)
                ta_d['done']                     = ta_d['done'].write(time, done)
                # t=0 to T
                ta_d['rewards']                  = write_zero_out(time, ta_d['rewards'] , corr_classification_fb, last_done)

                if random_locations:
                    next_decision, next_action, next_action_mean, pl_records = planner.random_policy()
                else:
                    next_decision, next_action, next_action_mean, pl_records = planner.planning_step(current_state, zs_post, z_samples, time, self.is_training)

                # t=1 to T
                for k, v in pl_records.items():
                    ta_d[k] = write_zero_out(time, ta_d[k], v, last_done)
                ta_d['current_cs'] = write_zero_out(time+1, ta_d['current_cs'], current_state['c'], last_done)  # ONLY ONE t=0 TO T
                ta_d['decisions']  = write_zero_out(time, ta_d['decisions'], next_decision, last_done)
                # copy forward
                classification_decision = tf.where(last_done, last_decision, next_decision)
                # pass on to next time step
                last_done = done
                last_decision = next_decision
                last_z = zs_post  # TODO: or should this be the sampled ones?
                # last_c = current_c  # TODO: could also use the one from planning (new_c) or pi
                # last_s = current_s

                last_state = current_state

                # TODO: break loop if tf.reduce_all(last_done) (requires tf.while loop)
                time += 1

        with tf.name_scope('Stacking'):
            self.obs = ta_d['obs'].stack()  # [T,B,glimpse]
            self.actions = ta_d['actions'].stack()  # [T,B,2]
            actions_mean = ta_d['actions_mean'].stack()  # [T,B,2]
            self.decisions = ta_d['decisions'].stack()
            rewards = ta_d['rewards'].stack()
            done = ta_d['done'].stack()
            self.glimpse_nlls_posterior = ta_d['glimpse_nlls_posterior'].stack()  # [T,B,hyp]
            zs_post = ta_d['zs_post'].stack()  # [T,B,hyp,2*z]
            self.state_believes = ta_d['current_cs'].stack()  # [T+1,B,hyp]
            self.G = ta_d['G'].stack()  # not zero'd-out so far!
            bl_loc = ta_d['baselines'].stack()
            self.glimpse_reconstr = ta_d['glimpse_reconstr'].stack()  # [T,B,hyp,glimpse]

            # further records for debugging
            self.exp_exp_obs = ta_d['exp_exp_obs'].stack()
            self.exp_obs = ta_d['exp_obs'].stack()
            self.H_exp_exp_obs = ta_d['H_exp_exp_obs'].stack()
            self.exp_H = ta_d['exp_H'].stack()
            self.potential_actions = ta_d['potential_actions'].stack()  # [T,B,n_policies,loc]

            self.num_glimpses_dyn = tf.shape(self.obs)[0]
            T = FLAGS.num_glimpses - tf.count_nonzero(done, 0, dtype=tf.float32)
            self.avg_T = tf.reduce_mean(T)

        with tf.name_scope('Losses'):
            with tf.name_scope('RL'):
                returns = tf.cumsum(rewards, reverse=True, axis=0)
                policy_losses = policyNet.REINFORCE_losses(returns, bl_loc, self.actions, actions_mean)  # [T,B]
                policy_loss   = tf.reduce_sum(tf.reduce_mean(policy_losses, 1))

                baseline_mse = tf.reduce_mean(tf.square(tf.stop_gradient(returns[1:]) - bl_loc[:-1]))

            with tf.name_scope('Classification'):
                # might never make a classification decision
                # TODO: SHOULD I FORCE THE ACTION AT t=t TO BE A CLASSIFICATION?
                self.classification = classification_decision

            with tf.name_scope('VAE'):
                # mask losses of wrong hyptheses
                nll_posterior = tf.reduce_sum(self.glimpse_nlls_posterior, 0)  # sum over time
                correct_hypoths = tf.cast(tf.one_hot(env.y_MC, depth=FLAGS.num_classes), tf.bool)
                nll_posterior = tf.where(correct_hypoths, nll_posterior, tf.zeros_like(nll_posterior))  # zero-out all but true hypothesis
                nll_posterior = tf.reduce_mean(nll_posterior)  # mean over batch

                # assume N(0,1) prior model (event though atm prior never used)
                prior_mu = tf.fill([self.B, FLAGS.size_z], 0.)
                prior_sigma = tf.fill([self.B, FLAGS.size_z], 1.)

                zs_post_correct = tf.boolean_mask(zs_post, correct_hypoths, axis=1)
                post_mu, post_sigma = tf.split(zs_post_correct, 2, axis=2)
                # KL_div = T * VAEencoder.kl_div_normal(post_mu, post_sigma, prior_mu, prior_sigma)  # NOTE: "T *" is wrong as T is [self.B]. Incorporat before reducing to a scalar
                N_post = tfd.Normal(loc=post_mu, scale=post_sigma)
                N_prior = tfd.Normal(loc=prior_mu, scale=prior_sigma)
                KL_div = N_post.kl_divergence(N_prior)
                KL_div = tf.where(tf.tile(done[:, :, tf.newaxis], [1, 1, FLAGS.size_z]), tf.zeros_like(KL_div), KL_div)  # replace those that are done
                KL_div = tf.reduce_mean(tf.reduce_sum(KL_div, 0))

            # TODO: SCALE LOSSES DIFFERENTLY? (only necessary if they flow into the same weights, might not be the case so far)
            self.loss = policy_loss + baseline_mse + nll_posterior + KL_div


        with tf.variable_scope('Optimizer'):
            if random_locations:
                pretrain_vars = VAEencoder.trainable + VAEdecoder.trainable
                self.train_op, gradient_check_Pre, _ = self._create_train_op(FLAGS, nll_posterior + KL_div, self.global_step, varlist=pretrain_vars)
            else:
                self.train_op, gradient_check_F, _ = self._create_train_op(FLAGS, self.loss, self.global_step)

        with tf.name_scope('Summaries'):
            metrics_upd_coll = "streaming_updates"

            scalars = {'loss/loss': self.loss,
                       'loss/accuracy': tf.reduce_mean(tf.cast(tf.equal(classification_decision, self.y_MC), tf.float32)),
                       'loss/VAE_nll_posterior': nll_posterior,
                       'loss/VAE_KL_div': KL_div,
                       'loss/RL_loc_baseline_mse': tf.reduce_mean(baseline_mse),
                       'loss/RL_policy_loss': policy_loss,
                       'loss/RL_returns': tf.reduce_mean(returns),
                       'misc/T': self.avg_T,
                       'misc/share_no_decision': tf.count_nonzero(tf.equal(classification_decision, -1), dtype=tf.float32) / tf.cast(self.B, tf.float32)}

            for name, scalar in scalars.items():
                tf.summary.scalar(name, scalar)
                tf.metrics.mean(scalar, name=name, updates_collections=metrics_upd_coll)

            self.metrics_update = tf.get_collection(metrics_upd_coll)
            self.metrics_names = [v.name.replace('_1/update_op:0', '').replace('Summaries/', '') for v in self.metrics_update]

            self.summary = tf.summary.merge_all()

            self.glimpses_composed = env.composed_glimpse(FLAGS, self.obs, self.num_glimpses_dyn)

        self.acc = tf.reduce_mean(tf.cast(tf.equal(classification_decision, self.y_MC), tf.float32))  # only to get easy direct intermendiate outputs

        self.saver = self._create_saver(phase)