Ejemplo n.º 1
0
def get_kl_divergence(shape, mu, sigma, prior, sample):
    """
    Compute KL divergence between posterior and prior.
    log(q(theta)) - log(p(theta)) where
    p(theta) = pi*N(0,sigma1) + (1-pi)*N(0,sigma2)
    
    shape = shape of the sample we want to compute the KL of
    mu = the mu variable used when sampling
    sigma= the sigma variable used when sampling
    prior = the prior object with parameters
    sample = the sample from the posterior
    """

    #Flatten to a vector
    sample = tf.reshape(sample, [-1])

    #Get the log probability distribution of your sampled variable
    #So essentially get: q( theta | mu, sigma )
    posterior = Normal(mu, sigma)

    prior_1 = Normal(0.0, prior.sigma1)
    prior_2 = Normal(0.0, prior.sigma2)

    #get: sum( log[ q( theta | mu, sigma ) ] )
    q_theta = tf.reduce_sum(posterior.log_prob(sample))

    #get: sum( log[ p( theta ) ] ) for mixture prior
    mix1 = tf.reduce_sum(prior_1.log_prob(sample)) + tf.log(prior.pi_mix)
    mix2 = tf.reduce_sum(prior_2.log_prob(sample)) + tf.log(1.0 - prior.pi_mix)

    #Compute KL distance
    KL = q_theta - tf.reduce_logsumexp([mix1, mix2])

    return KL
Ejemplo n.º 2
0
    def make_distribs(self, xxx_todo_changeme):
        """Converts parameters return by `_build` into probability distributions.
        """
        (prior_where_loc, prior_where_scale, prior_what_loc, prior_what_scale, prop_prob_logit) = xxx_todo_changeme
        what_prior = Normal(prior_what_loc, prior_what_scale)
        where_prior = Normal(prior_where_loc, prior_where_scale)
        prop_prior = Bernoulli(logits=tf.squeeze(prop_prob_logit, -1))

        return what_prior, where_prior, prop_prior
def get_KL_divergence_Sample(shape, mu, sigma, prior, Z):
    """
    Compute KL divergence between posterior and prior.
    Instead of computing the real KL distance between the Prior and Variatiational
    posterior of the weights, we will jsut sample its value of the specific values
    of the sampled weights  W. 
    
    In this case:
        - Posterior: Multivariate Independent Gaussian.
        - Prior: Mixture model
    
    The sample of the posterior is:
        KL_sample = log(q(W|theta)) - log(p(W|theta_0)) where
         p(theta) = pi*N(0,sigma1) + (1-pi)*N(0,sigma2)
    
    Input:
        - mus,sigmas: 
        - Z: Samples weights values, the hidden variables !
    shape = shape of the sample we want to compute the KL of
    mu = the mu variable used when sampling
    sigma= the sigma variable used when sampling
    prior = the prior object with parameters
    sample = the sample from the posterior
    
    """

    # Flatten the hidden variables (weights)
    Z = tf.reshape(Z, [-1])

    #Get the log probability distribution of your sampled variable

    # Distribution of the Variational Posterior
    VB_distribution = Normal(mu, sigma)
    # Distribution of the Gaussian Components of the prior
    prior_1_distribution = Normal(0.0, prior.sigma1)
    prior_2_distribution = Normal(0.0, prior.sigma2)

    # Now we compute the log likelihood of those Hidden variables for their
    # prior and posterior.

    #get: sum( log[ q( theta | mu, sigma ) ] )
    q_ll = tf.reduce_sum(VB_distribution.log_prob(Z))

    #get: sum( log[ p( theta ) ] ) for mixture prior
    mix1 = tf.reduce_sum(prior_1_distribution.log_prob(Z)) + tf.log(
        prior.pi_mix)
    mix2 = tf.reduce_sum(
        prior_2_distribution.log_prob(Z)) + tf.log(1.0 - prior.pi_mix)
    p_ll = tf.reduce_logsumexp([mix1, mix2])

    #Compute the sample of the KL distance as the substaction ob both
    KL = q_ll - p_ll

    return KL
Ejemplo n.º 4
0
def gaussian_Renyi_cdf_decoder(hidden_decoder, x_samples):
    W_decoder_hidden_reconstr_mu = weight_variable([hidden_decoder_dim, input_dim],"W_decoder_hidden_reconstr_mu")
    b_decoder_hidden_reconstr_mu = bias_variable([input_dim],"b_decoder_hidden_reconstr_mu")

    W_decoder_hidden_reconstr_logvar = weight_variable([hidden_decoder_dim, input_dim],"W_decoder_hidden_reconstr_logvar")
    b_decoder_hidden_reconstr_logvar = bias_variable([input_dim],"b_decoder_hidden_reconstr_logvar")

    offset = tf.constant(1.0, shape=b_decoder_hidden_reconstr_mu.get_shape(), dtype=tf_type)


    param_mul = tf.constant(1.0, dtype=tf_type)
    mu_decoder = tf.sigmoid(tf.multiply(param_mul, tf.matmul(hidden_decoder, W_decoder_hidden_reconstr_mu) + b_decoder_hidden_reconstr_mu))

    logvar_decoder = tf.matmul(hidden_decoder, W_decoder_hidden_reconstr_logvar) + b_decoder_hidden_reconstr_logvar


    std_decoder = tf.exp(0.5 * logvar_decoder) + 1e-5

    term1 = tf.divide(np.sqrt((alpha-1)/((2-alpha)*2)),std_decoder)
    b = x_samples + eps_int_sup #eps_int = 0.5e-1
    a = x_samples - eps_int_inf
    bt = tf.multiply(term1, b - mu_decoder)
    at = tf.multiply(term1, a - mu_decoder)

    dawson_sup = mydawson(bt)

    term2 = tf.multiply(mydawson(bt), tf.exp(tf.pow(bt, 2)-tf.pow(at, 2))) - mydawson(at) + 1e-8

    rez = term2
    log_term2_2 = tf.pow(at,2) + tf.log(rez)

    term1_2 = tf.divide(std_decoder,np.sqrt((alpha - 1) / ((2 - alpha) * 2)))
    log_Id = tf.log(term1_2) + log_term2_2

    elem1 = tf.log(np.sqrt(2*np.pi)*std_decoder)
    elem2 = log_Id *( (2-alpha)/(alpha-1) )

    h_z1 = elem1 + elem2

    ###############################################################################################
    # These lines are for the importance sampling estimate of log_int_px
    log_pxz = Normal(mu_decoder, std_decoder).log_prob(x_samples)
    pxz = Normal(mu_decoder, std_decoder).prob(x_samples)

    cdf_poz = Normal(mu_decoder, std_decoder).cdf(x_samples + eps_int_sup)#0.5e-1)  # 2
    cdf_neg = Normal(mu_decoder, std_decoder).cdf(x_samples - eps_int_inf)#0.5e-1)  # 2

    log_cdf_components = tf.log(cdf_poz - cdf_neg + 1e-8)
    log_cdf2_pxz = log_cdf_components
    #END These lines are for the importance sampling estimate of log_int_px
    ###############################################################################################
    return std_decoder, mu_decoder, bt, at, b, a,dawson_sup, h_z1, elem1, elem2, log_Id, term1, term1_2, term2, rez,log_pxz, pxz, log_cdf2_pxz
Ejemplo n.º 5
0
def KL_scale_mixture(shape, mu, sigma, prior, w):
    """Compute KL for scale mixture Gaussian priors
    shape = (n_unit, n_w)
    """
    posterior = Normal(mu, sigma)
    part_post = posterior.log_prob(tf.reshape(w, [-1]))  # flatten
    prior_1 = Normal(0., prior.sigma_1)
    prior_2 = Normal(0., prior.sigma_2)
    part_1 = tf.reduce_sum(prior_1.log_prob(w)) + tf.log(prior.pi)
    part_2 = tf.reduce_sum(prior_2.log_prob(w)) + tf.log(prior.pi)
    prior_mix = tf.stack([part_1, part_2])
    KL = - tf.reduce_sum(tf.reduce_logsumexp(prior_mix, axis=0)) + \
        tf.reduce_sum(part_post)
    return KL
Ejemplo n.º 6
0
def gauss_sample(gauss_params, quant_chann, use_log_scales=True):
    mean, std = mean_std_from_out_params(gauss_params, use_log_scales)
    distribution = Normal(loc=mean, scale=std)
    x = distribution.sample()
    x = tf.clip_by_value(x, -1., 1. - 2. / quant_chann)
    x_quantized = utils.cast_quantize(x, quant_chann)
    return x_quantized
Ejemplo n.º 7
0
def get_gaussian_mixture_log_prob(cat_probs, gauss_mu, gauss_sigma):
  """Get the logrithmic p.d.f. of a Gaussian mixture model.

  Args:
    cat_probs:
      `1-D` tensor with unit (reduce) sum, as the categorical probabilities.

    gauss_mu:
      List of tensors, with the length the shape of `cat_probs`, as the `mu`
      values of the Gaussian components. All these tensors shall share the
      same shape (as, e.g., `gauss_mu[0]`)

    gauss_sigma:
      List of tensors, with the length the shape of `cat_probs`, as the `sigma`
      values of the Gaussian components. Thus shall be all positive, and shall
      be all the same shape as `gauss_mu[0]`.

  Returns:
    Callable, mapping from tensor of the shape of `gauss_mu[0]` to scalar, as
    the p.d.f..
  """

  n_cats = cat_probs.shape[0]
  cat = Categorical(probs=cat_probs)
  components = [
      Independent( Normal(gauss_mu[i], gauss_sigma[i]) )
      for i in range(n_cats)
  ]
  distribution = Mixture(cat=cat, components=components)

  return distribution.log_prob
Ejemplo n.º 8
0
    def __init__(self, policy, rate, train=True):
        self.rate = rate
        self.policy = policy

        with tf.variable_scope('policy_estimator'):
            self.policy.setup()

            self.X = policy.X
            self.a = policy.a
            self.target = tf.placeholder(dtype='float',
                                         shape=[None, 1],
                                         name='target')

            self.a_pred = policy.a_pred
            self.var = policy.var

            dist = Normal(self.a_pred, self.var)
            self.log_probs = dist.log_prob(self.a)

            self.losses = self.log_probs * self.target
            self.loss = tf.reduce_sum(self.losses, name='loss')

            if train:
                self.opt = tf.train.RMSPropOptimizer(rate, 0.99, 0.0, 1e-6)
                self.grads_and_vars = self.opt.compute_gradients(self.loss)
                self.grads_and_vars = [(g, v) for g, v in self.grads_and_vars
                                       if g is not None]
                self.update = self.opt.apply_gradients(self.grads_and_vars)
Ejemplo n.º 9
0
    def loss(self):

        # Recognition prior
        p_z_mu = tf.constant(0.0, dtype=tf.float32)
        p_z_sigma = tf.constant(1.0, dtype=tf.float32)
        p_z = Normal(p_z_mu, p_z_sigma)

        # Loss

        ## Reconstruction error
        log_p_x_given_z = tf.reduce_mean(tf.reduce_sum(
            self.p_x_given_z.log_prob(self.x), axis=1),
                                         name='reconstruction_error')
        tf.add_to_collection('losses', log_p_x_given_z)

        ## Regularisation
        KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z),
                                             axis=1),
                               name="kl_divergence")
        tf.add_to_collection('losses', KL_qp)

        # Averaging over samples.
        self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound')
        tf.add_to_collection('losses', self.loss_op)

        # Add scalar summaries for the losses
        for l in tf.get_collection('losses'):
            tf.summary.scalar(l.op.name, l)
Ejemplo n.º 10
0
def build_factor(name, prior, shape=None):
    """instantiate an approximate posterior factor"""

    # create posterior Gaussian factor
    shape = shape or prior.batch_shape
    float_type = gpflow.settings.float_type
    with tf.variable_scope(name):
        init_loc = tf.placeholder(float_type, shape=shape)
        init_log_scale = tf.placeholder(float_type, shape=shape)
        loc = tf.get_variable('loc', initializer=init_loc)
        log_scale = tf.get_variable('log_scale', initializer=init_log_scale)
        scale = tf.exp(log_scale, name='scale')

    # contribution to the ELBO
    transform = get_support_transform(prior)

    raw_sample = Normal(loc, scale).sample()
    sample = transform.forward(raw_sample)
    log_abs_det_jac = transform.forward_log_det_jacobian(
        raw_sample, transform.forward_min_event_ndims)
    prior_logprob = prior.log_prob(sample)
    entropy = 0.5 * (1.0 + np.log(2 * np.pi)) + log_scale
    elbo_part = tf.reduce_sum(prior_logprob + log_abs_det_jac + entropy)

    tensors = (loc, log_scale)
    init_tensors = (init_loc, init_log_scale)
    return Factor(sample, elbo_part, tensors, init_tensors)
Ejemplo n.º 11
0
def build_ard_priors(model_kernel):
    """create ARD priors dictionary for projected kernel hyperparameters"""

    float_type = gpflow.settings.float_type
    gamma_prior = Gamma(float_type(0.001), float_type(0.001))

    priors = {}
    extra_factors = {}
    kernel_stack = [model_kernel]
    while kernel_stack:
        kernel = kernel_stack.pop()

        if isinstance(kernel, ProjKernel):
            # create an ARD-like prior, as in probabilistic PCA
            prec_name = kernel.W.pathname + '/precision'
            prec_shape = (1, kernel.W.shape[1])
            prec_factor = build_factor(prec_name, gamma_prior, prec_shape)
            extra_factors[prec_name] = prec_factor

            scale_sample = 1. / tf.sqrt(prec_factor.sample)
            priors[kernel.W] = Normal(float_type(0), scale_sample)

            kernel_stack.append(kernel.base_kernel)

        elif isinstance(kernel, gpflow.kernels.Combination):
            kernel_stack.extend(kernel.kernels)

    return priors, extra_factors
Ejemplo n.º 12
0
    def __init__(self,
                 keep_prob,
                 input_dim,
                 output_dim,
                 placeholders,
                 sparse_inputs=False,
                 norm=True,
                 **kwargs):
        # TODO sparse inputs
        super(DetDropoutFC, self).__init__(**kwargs)
        self.sparse_inputs = sparse_inputs
        self.norm = norm
        self.keep_prob = keep_prob
        self.normal = Normal(0.0, 1.0)
        self.log_values = []

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = glorot([input_dim, output_dim],
                                          name='weights')
            if norm:
                self.vars['offset'] = zeros([1, output_dim], name='offset')
                self.vars['scale'] = ones([1, output_dim], name='scale')

        if self.logging:
            self._log_vars()
Ejemplo n.º 13
0
    def _build(self, transition, input_encoder, glimpse_encoder,
               glimpse_decoder, transform_estimator, steps_predictor, kwargs):
        """Build the model. See __init__ for argument description"""

        if self.explore_eps is not None:
            self.explore_eps = tf.get_variable('explore_eps',
                                               initializer=self.explore_eps,
                                               trainable=False)

        self.cell = AIRCell(self.img_size,
                            self.glimpse_size,
                            self.n_appearance,
                            transition,
                            input_encoder,
                            glimpse_encoder,
                            glimpse_decoder,
                            transform_estimator,
                            steps_predictor,
                            canvas_init=None,
                            discrete_steps=self.discrete_steps,
                            explore_eps=self.explore_eps,
                            debug=self.debug,
                            **kwargs)

        initial_state = self.cell.initial_state(self.obs)

        dummy_sequence = tf.zeros((self.max_steps, self.batch_size, 1),
                                  name='dummy_sequence')
        outputs, state = tf.nn.dynamic_rnn(self.cell,
                                           dummy_sequence,
                                           initial_state=initial_state,
                                           time_major=True)

        for name, output in zip(self.cell.output_names, outputs):
            setattr(self, name, output)

        self.final_state = state[-2]
        self.glimpse = tf.reshape(self.presence * tf.nn.sigmoid(self.glimpse),
                                  (
                                      self.max_steps,
                                      self.batch_size,
                                  ) + tuple(self.glimpse_size))
        self.canvas = tf.reshape(self.canvas, (
            self.max_steps,
            self.batch_size,
        ) + tuple(self.img_size))
        self.canvas *= self.output_multiplier

        self.final_canvas = self.canvas[-1]

        self.output_distrib = Normal(self.final_canvas, self.output_std)

        posterior_step_probs = tf.transpose(tf.squeeze(self.presence_prob))
        self.num_steps_distrib = NumStepsDistribution(posterior_step_probs)

        self.num_step_per_sample = tf.to_float(
            tf.squeeze(tf.reduce_sum(self.presence, 0)))
        self.num_step = tf.reduce_mean(self.num_step_per_sample)
        self.gt_num_steps = tf.squeeze(tf.reduce_sum(self.nums, 0))
 def _build_anet(self, name, trainable):
     with tf.variable_scope(name):
         l1 = tf.layers.dense(self.tfs, 200, tf.nn.relu, trainable=trainable)
         mu = 2 * tf.layers.dense(l1, A_DIM, tf.nn.tanh, trainable=trainable)
         sigma = tf.layers.dense(l1, A_DIM, tf.nn.softplus, trainable=trainable)
         norm_dist = Normal(loc=mu, scale=sigma)
     params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
     return norm_dist, params
Ejemplo n.º 15
0
def make_dists_and_sample(latent_sample_seq):
    # latent_sample_seq constists of means and log_stds
    latent_dim = int(latent_sample_seq.get_shape().as_list()[-1] / 2)
    latent_dists = Normal(loc=latent_sample_seq[..., :latent_dim],
                          scale=tf.exp(latent_sample_seq[..., latent_dim:]))
    latent_sample_seq = tf.squeeze(latent_dists.sample(
        [1]))  # sample one sample from each distribution
    return latent_dists, latent_sample_seq
Ejemplo n.º 16
0
 def _gauss_log_pi(self, mu, log_sig):
     sigma = tf.exp(log_sig)
     normal = Normal(mu, sigma)
     z = normal.sample()
     actions = self._squash_actions(z)
     gauss_log_prob = normal.log_prob(z)
     log_pi = gauss_log_prob - self._squash_correction(z)
     return log_pi[:, None], actions
Ejemplo n.º 17
0
def norm(x, sigma):
    """
    Gaussian decay.
    Result is 1.0 for x = 0 and decays towards 0 for |x > sigma.
    :param x: Tensor.
    :param sigma: Tensor.
    :return: Tensor.
    """
    dist = Normal(0.0, sigma)
    return dist.pdf(x) / dist.pdf(0.0)
Ejemplo n.º 18
0
 def scale_network(self, mean, variance, is_sampling):
     scale_mean = tf.clip_by_value(mean, 0., 1.)
     scale = tf.cond(
         is_sampling, lambda: scale_mean + tf.random_normal(
             tf.shape(scale_mean), stddev=variance), lambda: scale_mean)
     scale = tf.clip_by_value(scale, 0., 1.)
     scale = tf.stop_gradient(scale)
     log_prob = Normal(scale_mean, variance).log_prob(scale)
     log_prob = tf.reduce_sum(log_prob, -1)
     return scale, mean, log_prob
Ejemplo n.º 19
0
def normal_kl(m1,s1,m2,s2, sample):
    """
    KL divergence for the Normal distribution using MC sampling.
    """
    
    p_log_prob = Normal(m1, s1).log_prob(sample)
    q_log_prob = Normal(m2, s2).log_prob(sample)
   
    return  -(p_log_prob - q_log_prob)





    v = tf.where(u > u_prime, v_1, v_0)
    v = tf.check_numerics(v, 'v sampling is not numerically stable.')
    v = v + tf.stop_gradient(-v + u)  # v and u are the same up to numerical errors

    return v
Ejemplo n.º 20
0
 def location_network(self, mean, variance, is_sampling):
     loc_mean = tf.clip_by_value(mean, -1., 1.)
     loc = tf.cond(
         is_sampling, lambda: loc_mean + tf.random_normal(
             tf.shape(loc_mean), stddev=variance), lambda: loc_mean)
     loc = tf.clip_by_value(loc, -1., 1.)
     loc = tf.stop_gradient(loc)
     log_prob = Normal(loc_mean, variance).log_prob(loc)
     log_prob = tf.reduce_sum(log_prob, -1)
     return loc, mean, log_prob
Ejemplo n.º 21
0
 def _sample(self, mu, std_dev):
     """
 Sample from parametrized Gaussian distribution.
 :param mu: Gaussian mean.
 :param std_dev: Standard deviation of the Gaussian.
 :return: Sample z.
 """
     z_dists = Normal(loc=mu, scale=std_dev)
     z = tf.squeeze(z_dists.sample(
         [1]))  # sample one sample from each distribution
     return z
Ejemplo n.º 22
0
    def build_weights(self):

        if self.is_prior:
            raise Exception('Prior distribution should not be sampled from')

        self.mean = tf.Variable(tf.random_normal(
            shape=self.size, mean=0., stddev=0.1))
        self.log_std = tf.Variable(tf.random_normal(
            shape=self.size, mean=-3., stddev=0.1))

        eps = Normal(0., 1.).sample(self.size)
        self.sample = tf.multiply(tf.exp(self.log_std), eps) + self.mean
Ejemplo n.º 23
0
def mog_from_out_params(mog_params, use_log_scales):
    logit_probs, means, std_params = tf.split(mog_params, num_or_size_splits=3, axis=2)
    cat = Categorical(logits=logit_probs)

    nr_mix = mog_params.get_shape().as_list()[2] // 3
    components = []
    for i in range(nr_mix):
        gauss_params = tf.stack([means[:, :, i], std_params[:, :, i]], axis=2)
        mean, std = mean_std_from_out_params(gauss_params, use_log_scales)
        components.append(Normal(loc=mean, scale=std))
    distribution = Mixture(cat=cat, components=components)
    return distribution
Ejemplo n.º 24
0
 def __init__(self,
              n_params,
              loc_mult=1.,
              scale_offset=0.,
              *args,
              **kwargs):
     super(ParametrisedGaussian, self).__init__()
     self._n_params = n_params
     self._loc_mult = loc_mult
     self._scale_offset = scale_offset
     self._create_distrib = lambda x, y: Normal(x,
                                                tf.nn.softplus(y) + 1e-4, *
                                                args, **kwargs)
Ejemplo n.º 25
0
    def _create_network(self):
        # Initialize autoencode network weights and biases
        network_weights = self._initialize_weights(**self.network_architecture)

        # Use recognition network to determine mean and
        # (log) variance of Gaussian distribution in latent
        # space
        self.z_mean, self.z_log_sigma_sq = \
            self._recognition_network(network_weights["weights_recog"],
                                      network_weights["biases_recog"],
                                      self.x)

        # Draw one sample z from Gaussian distribution
        n_z = self.network_architecture["n_z"]
        eps = tf.random_normal((self.batch_size, n_z), 0, 1,
                               dtype=tf.float32)
        # z = mu + sigma*epsilon
        self.z = tf.add(self.z_mean,
                        tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps),
                        name='z')

        # Use generator to determine mean of
        # Bernoulli distribution of reconstructed input
        self.x_reconstr_mean = \
            self._generator_network(network_weights["weights_gener"],
                                    network_weights["biases_gener"],
                                    z=self.z)

        ####
        ####
        ####
        eps = tf.random_normal((self.batch_size, n_z), 0, 1,
                               dtype=tf.float32)

        self.z_theta = tf.add(0.0, tf.mul(1.0, eps), name='z_theta')

        self.x_prime = self._generator_network(network_weights["weights_gener"],
                                               network_weights["biases_gener"],
                                               z=self.z_theta)

        self.z_prime_mean, self.z_prime_log_sigma_sq = self._recognition_network(
            network_weights["weights_recog"],
            network_weights["biases_recog"],
            self.x_prime)

        dist = Normal(mu=self.z_prime_mean, sigma=tf.sqrt(tf.exp(self.z_prime_log_sigma_sq)))
        logli = tf.reduce_sum(dist.log_pdf(self.z_theta, name='x_entropy'), reduction_indices=1)

        self.cross_entropy = tf.reduce_mean(- logli)
        #self.cross_entropy = tf.reduce_mean(- dist.log_pdf(self.z_theta, name='x_entropy'))
        self.entropy = tf.constant(28.37)
Ejemplo n.º 26
0
def get_noisy_weights(shape, name, prior, is_training, rho_min_init=None,
                      rho_max_init=None):
    """Get noisy weights
    1. Sample weights as given shape and configuration
    2. Update histogram summary
    3. Update KLqp
    4. Return distribution of weights variables
    """
    # add mean
    with tf.variable_scope('BBB', reuse=not is_training):
        mu = tf.get_variable(name + '_mean', shape, dtype=tf.float32)

    # add rho
    if rho_min_init is None or rho_max_init is None:
        rho_min_init, rho_max_init = prior.lstm_init()
    rho_init = tf.random_uniform_initializer(rho_min_init, rho_max_init)
    with tf.variable_scope('BBB', reuse=not is_training):
        rho = tf.get_variable(name + '_rho', shape, dtype=tf.float32,
                              initializer=rho_init)

    # control output
    if is_training or inference_mode == 'sample':
        epsilon = Normal(0., 1.).sample(shape)
        sigma = tf.nn.softplus(rho) + 1e-8
        w = mu + sigma * epsilon
    else:
        w = mu

    if is_training:
        return w

    # create histogram
    tf.summary.histogram(name + '_mu_hist', mu)
    tf.summary.histogram(name + '_sigma_hist', sigma)
    tf.summary.histogram(name + '_rho_hist', rho)

    # KL
    kl = KL_scale_mixture(shape,
                          tf.reshape(mu, [-1]),
                          tf.reshape(sigma, [-1]),
                          prior,
                          w)
    tf.add_to_collection('KL_layers', kl)

    return w
Ejemplo n.º 27
0
    def _build_anet(self, name, trainable):
        with tf.variable_scope(name):
            # prepare input
            s = tf.reshape(self.tfs, [-1, 3, FLAGS.node_dim, FLAGS.node_dim])
            s = tf.transpose(s,[0,2,3,1])
            h_conv1=conv2dWN(x=s, name='L1', num_filters=6,  trainable=trainable, nonlinearity=tf.nn.tanh, ema=None, shape=[3,3,3,6])
            h_conv1 = max_poo_2x2(h_conv1)
            h_flat = tf.reshape(h_conv1, [-1, 5*5*6]) # [batch, input]
            # lstm input should be [batch*n_step, input_size], h_flat need n_step copy
            y = tf.stack([h_flat,h_flat,h_flat,h_flat,h_flat,h_flat], axis=0)

            y = tf.reshape(y, [-1, self.input_size])
            self.y = tf.split(axis=0, num_or_size_splits=self.odnum, value=y)
            # lstm cell
            lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(self.a_cell_size, forget_bias=1.0, activation=tf.nn.softplus)
            lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(self.a_cell_size, forget_bias=1.0, activation=tf.nn.softplus)

            self.outputs_a, _, _ = tf.contrib.rnn.static_bidirectional_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=self.y, dtype=tf.float32)

            # final outout
            initializer = tf.truncated_normal(shape=[2*self.a_cell_size,1],stddev=0.01)

            self.weight_outmu = tf.get_variable(name='mu',initializer=initializer,trainable=trainable)
            bias_outmu = tf.constant(0.01)

            self.weight_outsigma = tf.get_variable(name='sigma',initializer= initializer ,trainable=trainable)
            bias_outsigma = tf.constant(0.01)

            transformed_outputsmu = [tf.nn.tanh(tf.matmul(output, self.weight_outmu)+bias_outmu)  for output in self.outputs_a]
            mu = tf.concat(transformed_outputsmu,0)
            mu = tf.reshape(mu,[-1, self.odnum])

            transformed_outputsigma = [tf.nn.softplus(tf.matmul(output, self.weight_outsigma)+bias_outsigma)  for output in self.outputs_a]
            sigma = tf.concat(transformed_outputsigma,0)
            sigma = tf.reshape(sigma,[-1, self.odnum])

            self.mu_reshape = tf.reshape(mu, shape=(-1,1))
            self.sigma_reshape  = tf.reshape(sigma, shape=(-1,1))

            norm_dist = Normal(loc=self.mu_reshape, scale=self.sigma_reshape) # for two dimmension mu[?,] each element is a mu for a distribution

        params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
        return norm_dist, params
Ejemplo n.º 28
0
    def gmm_log_pi(self, log_weights, mu, log_std):

        sigma = tf.exp(log_std)
        normal = Normal(mu, sigma)

        # sample from GMM
        sample_w = tf.stop_gradient(
            tf.multinomial(logits=log_weights, num_samples=1))
        sample_z = tf.stop_gradient(normal.sample())
        mask = tf.one_hot(sample_w[:, 0], depth=self._actor.K)
        z = tf.reduce_sum(sample_z * mask[:, :, None], axis=1)
        action = self.squash_action(z)

        # calculate log policy
        gauss_log_pi = normal.log_prob(z[:, None, :])
        log_pi = tf.reduce_logsumexp(gauss_log_pi + log_weights, axis=-1)
        log_pi -= tf.reduce_logsumexp(log_weights, axis=-1)
        log_pi -= self.get_squash_correction(z)
        log_pi *= self._temp

        return log_pi[:, None], action
Ejemplo n.º 29
0
def sample_posterior(shape, name, prior, is_training):
    """
        Get a sample from the multivariate posterior
    """

    rho_max_init = math.log(math.exp(prior.sigma_mix / 2.0) - 1.0)
    rho_min_init = math.log(math.exp(prior.sigma_mix / 4.0) - 1.0)

    init = tf.random_uniform_initializer(rho_min_init, rho_max_init)

    with tf.variable_scope("BBB", reuse=not is_training):
        mu = tf.get_variable(name + "_mean", shape=shape, dtype=data_type())

    with tf.variable_scope("BBB", reuse=not is_training):
        rho = tf.get_variable(name + "_rho",
                              shape=shape,
                              dtype=data_type(),
                              initializer=init)

    if is_training:
        epsilon = Normal(0.0, 1.0).sample(shape)
        sigma = tf.nn.softplus(rho) + 1e-5
        output = mu + sigma * epsilon
    else:
        output = mu

    if not is_training:
        return output

    tf.summary.histogram(name + '_rho_hist', rho)
    tf.summary.histogram(name + '_mu_hist', mu)
    tf.summary.histogram(name + '_sigma_hist', sigma)

    sample = output
    kl = get_kl_divergence(shape, tf.reshape(mu, [-1]),
                           tf.reshape(sigma, [-1]), prior, sample)
    tf.add_to_collection('KL_layers', kl)

    return output
Ejemplo n.º 30
0
 def _log_prob1(mean, std, targets):
     distribution = Normal(loc=mean, scale=std)
     log_prob = distribution.log_prob(targets)
     return log_prob