def get_kl_divergence(shape, mu, sigma, prior, sample): """ Compute KL divergence between posterior and prior. log(q(theta)) - log(p(theta)) where p(theta) = pi*N(0,sigma1) + (1-pi)*N(0,sigma2) shape = shape of the sample we want to compute the KL of mu = the mu variable used when sampling sigma= the sigma variable used when sampling prior = the prior object with parameters sample = the sample from the posterior """ #Flatten to a vector sample = tf.reshape(sample, [-1]) #Get the log probability distribution of your sampled variable #So essentially get: q( theta | mu, sigma ) posterior = Normal(mu, sigma) prior_1 = Normal(0.0, prior.sigma1) prior_2 = Normal(0.0, prior.sigma2) #get: sum( log[ q( theta | mu, sigma ) ] ) q_theta = tf.reduce_sum(posterior.log_prob(sample)) #get: sum( log[ p( theta ) ] ) for mixture prior mix1 = tf.reduce_sum(prior_1.log_prob(sample)) + tf.log(prior.pi_mix) mix2 = tf.reduce_sum(prior_2.log_prob(sample)) + tf.log(1.0 - prior.pi_mix) #Compute KL distance KL = q_theta - tf.reduce_logsumexp([mix1, mix2]) return KL
def make_distribs(self, xxx_todo_changeme): """Converts parameters return by `_build` into probability distributions. """ (prior_where_loc, prior_where_scale, prior_what_loc, prior_what_scale, prop_prob_logit) = xxx_todo_changeme what_prior = Normal(prior_what_loc, prior_what_scale) where_prior = Normal(prior_where_loc, prior_where_scale) prop_prior = Bernoulli(logits=tf.squeeze(prop_prob_logit, -1)) return what_prior, where_prior, prop_prior
def get_KL_divergence_Sample(shape, mu, sigma, prior, Z): """ Compute KL divergence between posterior and prior. Instead of computing the real KL distance between the Prior and Variatiational posterior of the weights, we will jsut sample its value of the specific values of the sampled weights W. In this case: - Posterior: Multivariate Independent Gaussian. - Prior: Mixture model The sample of the posterior is: KL_sample = log(q(W|theta)) - log(p(W|theta_0)) where p(theta) = pi*N(0,sigma1) + (1-pi)*N(0,sigma2) Input: - mus,sigmas: - Z: Samples weights values, the hidden variables ! shape = shape of the sample we want to compute the KL of mu = the mu variable used when sampling sigma= the sigma variable used when sampling prior = the prior object with parameters sample = the sample from the posterior """ # Flatten the hidden variables (weights) Z = tf.reshape(Z, [-1]) #Get the log probability distribution of your sampled variable # Distribution of the Variational Posterior VB_distribution = Normal(mu, sigma) # Distribution of the Gaussian Components of the prior prior_1_distribution = Normal(0.0, prior.sigma1) prior_2_distribution = Normal(0.0, prior.sigma2) # Now we compute the log likelihood of those Hidden variables for their # prior and posterior. #get: sum( log[ q( theta | mu, sigma ) ] ) q_ll = tf.reduce_sum(VB_distribution.log_prob(Z)) #get: sum( log[ p( theta ) ] ) for mixture prior mix1 = tf.reduce_sum(prior_1_distribution.log_prob(Z)) + tf.log( prior.pi_mix) mix2 = tf.reduce_sum( prior_2_distribution.log_prob(Z)) + tf.log(1.0 - prior.pi_mix) p_ll = tf.reduce_logsumexp([mix1, mix2]) #Compute the sample of the KL distance as the substaction ob both KL = q_ll - p_ll return KL
def gaussian_Renyi_cdf_decoder(hidden_decoder, x_samples): W_decoder_hidden_reconstr_mu = weight_variable([hidden_decoder_dim, input_dim],"W_decoder_hidden_reconstr_mu") b_decoder_hidden_reconstr_mu = bias_variable([input_dim],"b_decoder_hidden_reconstr_mu") W_decoder_hidden_reconstr_logvar = weight_variable([hidden_decoder_dim, input_dim],"W_decoder_hidden_reconstr_logvar") b_decoder_hidden_reconstr_logvar = bias_variable([input_dim],"b_decoder_hidden_reconstr_logvar") offset = tf.constant(1.0, shape=b_decoder_hidden_reconstr_mu.get_shape(), dtype=tf_type) param_mul = tf.constant(1.0, dtype=tf_type) mu_decoder = tf.sigmoid(tf.multiply(param_mul, tf.matmul(hidden_decoder, W_decoder_hidden_reconstr_mu) + b_decoder_hidden_reconstr_mu)) logvar_decoder = tf.matmul(hidden_decoder, W_decoder_hidden_reconstr_logvar) + b_decoder_hidden_reconstr_logvar std_decoder = tf.exp(0.5 * logvar_decoder) + 1e-5 term1 = tf.divide(np.sqrt((alpha-1)/((2-alpha)*2)),std_decoder) b = x_samples + eps_int_sup #eps_int = 0.5e-1 a = x_samples - eps_int_inf bt = tf.multiply(term1, b - mu_decoder) at = tf.multiply(term1, a - mu_decoder) dawson_sup = mydawson(bt) term2 = tf.multiply(mydawson(bt), tf.exp(tf.pow(bt, 2)-tf.pow(at, 2))) - mydawson(at) + 1e-8 rez = term2 log_term2_2 = tf.pow(at,2) + tf.log(rez) term1_2 = tf.divide(std_decoder,np.sqrt((alpha - 1) / ((2 - alpha) * 2))) log_Id = tf.log(term1_2) + log_term2_2 elem1 = tf.log(np.sqrt(2*np.pi)*std_decoder) elem2 = log_Id *( (2-alpha)/(alpha-1) ) h_z1 = elem1 + elem2 ############################################################################################### # These lines are for the importance sampling estimate of log_int_px log_pxz = Normal(mu_decoder, std_decoder).log_prob(x_samples) pxz = Normal(mu_decoder, std_decoder).prob(x_samples) cdf_poz = Normal(mu_decoder, std_decoder).cdf(x_samples + eps_int_sup)#0.5e-1) # 2 cdf_neg = Normal(mu_decoder, std_decoder).cdf(x_samples - eps_int_inf)#0.5e-1) # 2 log_cdf_components = tf.log(cdf_poz - cdf_neg + 1e-8) log_cdf2_pxz = log_cdf_components #END These lines are for the importance sampling estimate of log_int_px ############################################################################################### return std_decoder, mu_decoder, bt, at, b, a,dawson_sup, h_z1, elem1, elem2, log_Id, term1, term1_2, term2, rez,log_pxz, pxz, log_cdf2_pxz
def KL_scale_mixture(shape, mu, sigma, prior, w): """Compute KL for scale mixture Gaussian priors shape = (n_unit, n_w) """ posterior = Normal(mu, sigma) part_post = posterior.log_prob(tf.reshape(w, [-1])) # flatten prior_1 = Normal(0., prior.sigma_1) prior_2 = Normal(0., prior.sigma_2) part_1 = tf.reduce_sum(prior_1.log_prob(w)) + tf.log(prior.pi) part_2 = tf.reduce_sum(prior_2.log_prob(w)) + tf.log(prior.pi) prior_mix = tf.stack([part_1, part_2]) KL = - tf.reduce_sum(tf.reduce_logsumexp(prior_mix, axis=0)) + \ tf.reduce_sum(part_post) return KL
def gauss_sample(gauss_params, quant_chann, use_log_scales=True): mean, std = mean_std_from_out_params(gauss_params, use_log_scales) distribution = Normal(loc=mean, scale=std) x = distribution.sample() x = tf.clip_by_value(x, -1., 1. - 2. / quant_chann) x_quantized = utils.cast_quantize(x, quant_chann) return x_quantized
def get_gaussian_mixture_log_prob(cat_probs, gauss_mu, gauss_sigma): """Get the logrithmic p.d.f. of a Gaussian mixture model. Args: cat_probs: `1-D` tensor with unit (reduce) sum, as the categorical probabilities. gauss_mu: List of tensors, with the length the shape of `cat_probs`, as the `mu` values of the Gaussian components. All these tensors shall share the same shape (as, e.g., `gauss_mu[0]`) gauss_sigma: List of tensors, with the length the shape of `cat_probs`, as the `sigma` values of the Gaussian components. Thus shall be all positive, and shall be all the same shape as `gauss_mu[0]`. Returns: Callable, mapping from tensor of the shape of `gauss_mu[0]` to scalar, as the p.d.f.. """ n_cats = cat_probs.shape[0] cat = Categorical(probs=cat_probs) components = [ Independent( Normal(gauss_mu[i], gauss_sigma[i]) ) for i in range(n_cats) ] distribution = Mixture(cat=cat, components=components) return distribution.log_prob
def __init__(self, policy, rate, train=True): self.rate = rate self.policy = policy with tf.variable_scope('policy_estimator'): self.policy.setup() self.X = policy.X self.a = policy.a self.target = tf.placeholder(dtype='float', shape=[None, 1], name='target') self.a_pred = policy.a_pred self.var = policy.var dist = Normal(self.a_pred, self.var) self.log_probs = dist.log_prob(self.a) self.losses = self.log_probs * self.target self.loss = tf.reduce_sum(self.losses, name='loss') if train: self.opt = tf.train.RMSPropOptimizer(rate, 0.99, 0.0, 1e-6) self.grads_and_vars = self.opt.compute_gradients(self.loss) self.grads_and_vars = [(g, v) for g, v in self.grads_and_vars if g is not None] self.update = self.opt.apply_gradients(self.grads_and_vars)
def loss(self): # Recognition prior p_z_mu = tf.constant(0.0, dtype=tf.float32) p_z_sigma = tf.constant(1.0, dtype=tf.float32) p_z = Normal(p_z_mu, p_z_sigma) # Loss ## Reconstruction error log_p_x_given_z = tf.reduce_mean(tf.reduce_sum( self.p_x_given_z.log_prob(self.x), axis=1), name='reconstruction_error') tf.add_to_collection('losses', log_p_x_given_z) ## Regularisation KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z), axis=1), name="kl_divergence") tf.add_to_collection('losses', KL_qp) # Averaging over samples. self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound') tf.add_to_collection('losses', self.loss_op) # Add scalar summaries for the losses for l in tf.get_collection('losses'): tf.summary.scalar(l.op.name, l)
def build_factor(name, prior, shape=None): """instantiate an approximate posterior factor""" # create posterior Gaussian factor shape = shape or prior.batch_shape float_type = gpflow.settings.float_type with tf.variable_scope(name): init_loc = tf.placeholder(float_type, shape=shape) init_log_scale = tf.placeholder(float_type, shape=shape) loc = tf.get_variable('loc', initializer=init_loc) log_scale = tf.get_variable('log_scale', initializer=init_log_scale) scale = tf.exp(log_scale, name='scale') # contribution to the ELBO transform = get_support_transform(prior) raw_sample = Normal(loc, scale).sample() sample = transform.forward(raw_sample) log_abs_det_jac = transform.forward_log_det_jacobian( raw_sample, transform.forward_min_event_ndims) prior_logprob = prior.log_prob(sample) entropy = 0.5 * (1.0 + np.log(2 * np.pi)) + log_scale elbo_part = tf.reduce_sum(prior_logprob + log_abs_det_jac + entropy) tensors = (loc, log_scale) init_tensors = (init_loc, init_log_scale) return Factor(sample, elbo_part, tensors, init_tensors)
def build_ard_priors(model_kernel): """create ARD priors dictionary for projected kernel hyperparameters""" float_type = gpflow.settings.float_type gamma_prior = Gamma(float_type(0.001), float_type(0.001)) priors = {} extra_factors = {} kernel_stack = [model_kernel] while kernel_stack: kernel = kernel_stack.pop() if isinstance(kernel, ProjKernel): # create an ARD-like prior, as in probabilistic PCA prec_name = kernel.W.pathname + '/precision' prec_shape = (1, kernel.W.shape[1]) prec_factor = build_factor(prec_name, gamma_prior, prec_shape) extra_factors[prec_name] = prec_factor scale_sample = 1. / tf.sqrt(prec_factor.sample) priors[kernel.W] = Normal(float_type(0), scale_sample) kernel_stack.append(kernel.base_kernel) elif isinstance(kernel, gpflow.kernels.Combination): kernel_stack.extend(kernel.kernels) return priors, extra_factors
def __init__(self, keep_prob, input_dim, output_dim, placeholders, sparse_inputs=False, norm=True, **kwargs): # TODO sparse inputs super(DetDropoutFC, self).__init__(**kwargs) self.sparse_inputs = sparse_inputs self.norm = norm self.keep_prob = keep_prob self.normal = Normal(0.0, 1.0) self.log_values = [] with tf.variable_scope(self.name + '_vars'): self.vars['weights'] = glorot([input_dim, output_dim], name='weights') if norm: self.vars['offset'] = zeros([1, output_dim], name='offset') self.vars['scale'] = ones([1, output_dim], name='scale') if self.logging: self._log_vars()
def _build(self, transition, input_encoder, glimpse_encoder, glimpse_decoder, transform_estimator, steps_predictor, kwargs): """Build the model. See __init__ for argument description""" if self.explore_eps is not None: self.explore_eps = tf.get_variable('explore_eps', initializer=self.explore_eps, trainable=False) self.cell = AIRCell(self.img_size, self.glimpse_size, self.n_appearance, transition, input_encoder, glimpse_encoder, glimpse_decoder, transform_estimator, steps_predictor, canvas_init=None, discrete_steps=self.discrete_steps, explore_eps=self.explore_eps, debug=self.debug, **kwargs) initial_state = self.cell.initial_state(self.obs) dummy_sequence = tf.zeros((self.max_steps, self.batch_size, 1), name='dummy_sequence') outputs, state = tf.nn.dynamic_rnn(self.cell, dummy_sequence, initial_state=initial_state, time_major=True) for name, output in zip(self.cell.output_names, outputs): setattr(self, name, output) self.final_state = state[-2] self.glimpse = tf.reshape(self.presence * tf.nn.sigmoid(self.glimpse), ( self.max_steps, self.batch_size, ) + tuple(self.glimpse_size)) self.canvas = tf.reshape(self.canvas, ( self.max_steps, self.batch_size, ) + tuple(self.img_size)) self.canvas *= self.output_multiplier self.final_canvas = self.canvas[-1] self.output_distrib = Normal(self.final_canvas, self.output_std) posterior_step_probs = tf.transpose(tf.squeeze(self.presence_prob)) self.num_steps_distrib = NumStepsDistribution(posterior_step_probs) self.num_step_per_sample = tf.to_float( tf.squeeze(tf.reduce_sum(self.presence, 0))) self.num_step = tf.reduce_mean(self.num_step_per_sample) self.gt_num_steps = tf.squeeze(tf.reduce_sum(self.nums, 0))
def _build_anet(self, name, trainable): with tf.variable_scope(name): l1 = tf.layers.dense(self.tfs, 200, tf.nn.relu, trainable=trainable) mu = 2 * tf.layers.dense(l1, A_DIM, tf.nn.tanh, trainable=trainable) sigma = tf.layers.dense(l1, A_DIM, tf.nn.softplus, trainable=trainable) norm_dist = Normal(loc=mu, scale=sigma) params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name) return norm_dist, params
def make_dists_and_sample(latent_sample_seq): # latent_sample_seq constists of means and log_stds latent_dim = int(latent_sample_seq.get_shape().as_list()[-1] / 2) latent_dists = Normal(loc=latent_sample_seq[..., :latent_dim], scale=tf.exp(latent_sample_seq[..., latent_dim:])) latent_sample_seq = tf.squeeze(latent_dists.sample( [1])) # sample one sample from each distribution return latent_dists, latent_sample_seq
def _gauss_log_pi(self, mu, log_sig): sigma = tf.exp(log_sig) normal = Normal(mu, sigma) z = normal.sample() actions = self._squash_actions(z) gauss_log_prob = normal.log_prob(z) log_pi = gauss_log_prob - self._squash_correction(z) return log_pi[:, None], actions
def norm(x, sigma): """ Gaussian decay. Result is 1.0 for x = 0 and decays towards 0 for |x > sigma. :param x: Tensor. :param sigma: Tensor. :return: Tensor. """ dist = Normal(0.0, sigma) return dist.pdf(x) / dist.pdf(0.0)
def scale_network(self, mean, variance, is_sampling): scale_mean = tf.clip_by_value(mean, 0., 1.) scale = tf.cond( is_sampling, lambda: scale_mean + tf.random_normal( tf.shape(scale_mean), stddev=variance), lambda: scale_mean) scale = tf.clip_by_value(scale, 0., 1.) scale = tf.stop_gradient(scale) log_prob = Normal(scale_mean, variance).log_prob(scale) log_prob = tf.reduce_sum(log_prob, -1) return scale, mean, log_prob
def normal_kl(m1,s1,m2,s2, sample): """ KL divergence for the Normal distribution using MC sampling. """ p_log_prob = Normal(m1, s1).log_prob(sample) q_log_prob = Normal(m2, s2).log_prob(sample) return -(p_log_prob - q_log_prob) v = tf.where(u > u_prime, v_1, v_0) v = tf.check_numerics(v, 'v sampling is not numerically stable.') v = v + tf.stop_gradient(-v + u) # v and u are the same up to numerical errors return v
def location_network(self, mean, variance, is_sampling): loc_mean = tf.clip_by_value(mean, -1., 1.) loc = tf.cond( is_sampling, lambda: loc_mean + tf.random_normal( tf.shape(loc_mean), stddev=variance), lambda: loc_mean) loc = tf.clip_by_value(loc, -1., 1.) loc = tf.stop_gradient(loc) log_prob = Normal(loc_mean, variance).log_prob(loc) log_prob = tf.reduce_sum(log_prob, -1) return loc, mean, log_prob
def _sample(self, mu, std_dev): """ Sample from parametrized Gaussian distribution. :param mu: Gaussian mean. :param std_dev: Standard deviation of the Gaussian. :return: Sample z. """ z_dists = Normal(loc=mu, scale=std_dev) z = tf.squeeze(z_dists.sample( [1])) # sample one sample from each distribution return z
def build_weights(self): if self.is_prior: raise Exception('Prior distribution should not be sampled from') self.mean = tf.Variable(tf.random_normal( shape=self.size, mean=0., stddev=0.1)) self.log_std = tf.Variable(tf.random_normal( shape=self.size, mean=-3., stddev=0.1)) eps = Normal(0., 1.).sample(self.size) self.sample = tf.multiply(tf.exp(self.log_std), eps) + self.mean
def mog_from_out_params(mog_params, use_log_scales): logit_probs, means, std_params = tf.split(mog_params, num_or_size_splits=3, axis=2) cat = Categorical(logits=logit_probs) nr_mix = mog_params.get_shape().as_list()[2] // 3 components = [] for i in range(nr_mix): gauss_params = tf.stack([means[:, :, i], std_params[:, :, i]], axis=2) mean, std = mean_std_from_out_params(gauss_params, use_log_scales) components.append(Normal(loc=mean, scale=std)) distribution = Mixture(cat=cat, components=components) return distribution
def __init__(self, n_params, loc_mult=1., scale_offset=0., *args, **kwargs): super(ParametrisedGaussian, self).__init__() self._n_params = n_params self._loc_mult = loc_mult self._scale_offset = scale_offset self._create_distrib = lambda x, y: Normal(x, tf.nn.softplus(y) + 1e-4, * args, **kwargs)
def _create_network(self): # Initialize autoencode network weights and biases network_weights = self._initialize_weights(**self.network_architecture) # Use recognition network to determine mean and # (log) variance of Gaussian distribution in latent # space self.z_mean, self.z_log_sigma_sq = \ self._recognition_network(network_weights["weights_recog"], network_weights["biases_recog"], self.x) # Draw one sample z from Gaussian distribution n_z = self.network_architecture["n_z"] eps = tf.random_normal((self.batch_size, n_z), 0, 1, dtype=tf.float32) # z = mu + sigma*epsilon self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps), name='z') # Use generator to determine mean of # Bernoulli distribution of reconstructed input self.x_reconstr_mean = \ self._generator_network(network_weights["weights_gener"], network_weights["biases_gener"], z=self.z) #### #### #### eps = tf.random_normal((self.batch_size, n_z), 0, 1, dtype=tf.float32) self.z_theta = tf.add(0.0, tf.mul(1.0, eps), name='z_theta') self.x_prime = self._generator_network(network_weights["weights_gener"], network_weights["biases_gener"], z=self.z_theta) self.z_prime_mean, self.z_prime_log_sigma_sq = self._recognition_network( network_weights["weights_recog"], network_weights["biases_recog"], self.x_prime) dist = Normal(mu=self.z_prime_mean, sigma=tf.sqrt(tf.exp(self.z_prime_log_sigma_sq))) logli = tf.reduce_sum(dist.log_pdf(self.z_theta, name='x_entropy'), reduction_indices=1) self.cross_entropy = tf.reduce_mean(- logli) #self.cross_entropy = tf.reduce_mean(- dist.log_pdf(self.z_theta, name='x_entropy')) self.entropy = tf.constant(28.37)
def get_noisy_weights(shape, name, prior, is_training, rho_min_init=None, rho_max_init=None): """Get noisy weights 1. Sample weights as given shape and configuration 2. Update histogram summary 3. Update KLqp 4. Return distribution of weights variables """ # add mean with tf.variable_scope('BBB', reuse=not is_training): mu = tf.get_variable(name + '_mean', shape, dtype=tf.float32) # add rho if rho_min_init is None or rho_max_init is None: rho_min_init, rho_max_init = prior.lstm_init() rho_init = tf.random_uniform_initializer(rho_min_init, rho_max_init) with tf.variable_scope('BBB', reuse=not is_training): rho = tf.get_variable(name + '_rho', shape, dtype=tf.float32, initializer=rho_init) # control output if is_training or inference_mode == 'sample': epsilon = Normal(0., 1.).sample(shape) sigma = tf.nn.softplus(rho) + 1e-8 w = mu + sigma * epsilon else: w = mu if is_training: return w # create histogram tf.summary.histogram(name + '_mu_hist', mu) tf.summary.histogram(name + '_sigma_hist', sigma) tf.summary.histogram(name + '_rho_hist', rho) # KL kl = KL_scale_mixture(shape, tf.reshape(mu, [-1]), tf.reshape(sigma, [-1]), prior, w) tf.add_to_collection('KL_layers', kl) return w
def _build_anet(self, name, trainable): with tf.variable_scope(name): # prepare input s = tf.reshape(self.tfs, [-1, 3, FLAGS.node_dim, FLAGS.node_dim]) s = tf.transpose(s,[0,2,3,1]) h_conv1=conv2dWN(x=s, name='L1', num_filters=6, trainable=trainable, nonlinearity=tf.nn.tanh, ema=None, shape=[3,3,3,6]) h_conv1 = max_poo_2x2(h_conv1) h_flat = tf.reshape(h_conv1, [-1, 5*5*6]) # [batch, input] # lstm input should be [batch*n_step, input_size], h_flat need n_step copy y = tf.stack([h_flat,h_flat,h_flat,h_flat,h_flat,h_flat], axis=0) y = tf.reshape(y, [-1, self.input_size]) self.y = tf.split(axis=0, num_or_size_splits=self.odnum, value=y) # lstm cell lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(self.a_cell_size, forget_bias=1.0, activation=tf.nn.softplus) lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(self.a_cell_size, forget_bias=1.0, activation=tf.nn.softplus) self.outputs_a, _, _ = tf.contrib.rnn.static_bidirectional_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=self.y, dtype=tf.float32) # final outout initializer = tf.truncated_normal(shape=[2*self.a_cell_size,1],stddev=0.01) self.weight_outmu = tf.get_variable(name='mu',initializer=initializer,trainable=trainable) bias_outmu = tf.constant(0.01) self.weight_outsigma = tf.get_variable(name='sigma',initializer= initializer ,trainable=trainable) bias_outsigma = tf.constant(0.01) transformed_outputsmu = [tf.nn.tanh(tf.matmul(output, self.weight_outmu)+bias_outmu) for output in self.outputs_a] mu = tf.concat(transformed_outputsmu,0) mu = tf.reshape(mu,[-1, self.odnum]) transformed_outputsigma = [tf.nn.softplus(tf.matmul(output, self.weight_outsigma)+bias_outsigma) for output in self.outputs_a] sigma = tf.concat(transformed_outputsigma,0) sigma = tf.reshape(sigma,[-1, self.odnum]) self.mu_reshape = tf.reshape(mu, shape=(-1,1)) self.sigma_reshape = tf.reshape(sigma, shape=(-1,1)) norm_dist = Normal(loc=self.mu_reshape, scale=self.sigma_reshape) # for two dimmension mu[?,] each element is a mu for a distribution params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name) return norm_dist, params
def gmm_log_pi(self, log_weights, mu, log_std): sigma = tf.exp(log_std) normal = Normal(mu, sigma) # sample from GMM sample_w = tf.stop_gradient( tf.multinomial(logits=log_weights, num_samples=1)) sample_z = tf.stop_gradient(normal.sample()) mask = tf.one_hot(sample_w[:, 0], depth=self._actor.K) z = tf.reduce_sum(sample_z * mask[:, :, None], axis=1) action = self.squash_action(z) # calculate log policy gauss_log_pi = normal.log_prob(z[:, None, :]) log_pi = tf.reduce_logsumexp(gauss_log_pi + log_weights, axis=-1) log_pi -= tf.reduce_logsumexp(log_weights, axis=-1) log_pi -= self.get_squash_correction(z) log_pi *= self._temp return log_pi[:, None], action
def sample_posterior(shape, name, prior, is_training): """ Get a sample from the multivariate posterior """ rho_max_init = math.log(math.exp(prior.sigma_mix / 2.0) - 1.0) rho_min_init = math.log(math.exp(prior.sigma_mix / 4.0) - 1.0) init = tf.random_uniform_initializer(rho_min_init, rho_max_init) with tf.variable_scope("BBB", reuse=not is_training): mu = tf.get_variable(name + "_mean", shape=shape, dtype=data_type()) with tf.variable_scope("BBB", reuse=not is_training): rho = tf.get_variable(name + "_rho", shape=shape, dtype=data_type(), initializer=init) if is_training: epsilon = Normal(0.0, 1.0).sample(shape) sigma = tf.nn.softplus(rho) + 1e-5 output = mu + sigma * epsilon else: output = mu if not is_training: return output tf.summary.histogram(name + '_rho_hist', rho) tf.summary.histogram(name + '_mu_hist', mu) tf.summary.histogram(name + '_sigma_hist', sigma) sample = output kl = get_kl_divergence(shape, tf.reshape(mu, [-1]), tf.reshape(sigma, [-1]), prior, sample) tf.add_to_collection('KL_layers', kl) return output
def _log_prob1(mean, std, targets): distribution = Normal(loc=mean, scale=std) log_prob = distribution.log_prob(targets) return log_prob