def prop_mixture(Xs, mx, xx, sx): #comp_x = [tfd.Normal(loc=xx[j], scale=sx[j]) for j in range(n)] # xDist = tfd.Mixture(cat=tfd.Categorical(probs=mix_cat), components=comp_x) # xDist = tfd.Exponential( sx[0] ) mix_cat = [mx[0], 1.0 - mx[0]] #beta = tfd.Beta(2.0, 5.0) #beta =tfd.Normal(loc=0.0, scale=1.0) #bijector = tfd.bijectors.AffineScalar(shift=xx[1], scale=sx[1]) #beta_shift = tfd.TransformedDistribution( distribution=beta, bijector=bijector, name="test") #cDist = tfd.Mixture(cat=tfd.Categorical(probs=mix_cat), components=[tfd.Normal(loc=xx[0], scale=sx[0]), beta_shift]) p1 = tfd.Normal(loc=xx[0], scale=sx[0]).prob(Xs) p2 = tf.exp(-1.0 * sx[1] * (Xs - xx[0])) xr = tf.range(-10, 10, 0.01) p3 = tfd.Normal(loc=xx[0], scale=sx[0]).prob(xr) p4 = tf.exp(-sx[1] * (xr - xx[0])) integral_ = tf.reduce_sum(tf.math.multiply(p3, p4)) / 100.0 #integral_ = 1.77 * tf.exp(mx[1] * mx[1] * 4.0) cDist = tf.math.multiply(p1, p2) / integral_ cDist = p1 #tf.math.multiply(p1, p2) / integral_ #tf.reduce_sum(prop_mixture(tf.range(-10, 10, 0.01), mvars, xvars, svars) * 0.01)) return cDist
def sample_qH(self, H): h_mu = H[:, :self.dim_h] h_var = tf.exp(H[:, self.dim_h:]) qh = dist.Normal(h_mu, tf.sqrt(h_var)) ph = dist.Normal(tf.zeros_like(h_mu), tf.ones_like(h_var)) kl_h = dist.kl_divergence(qh, ph) h_sample = qh.sample() return h_sample, kl_h
def define_noise(batch_size_tensor, flags_obj): # Setup noise vector with tf.name_scope("LatentNoiseVector"): z = tfd.Normal(loc=0.0, scale=flags_obj.stddev).sample( sample_shape=(batch_size_tensor, flags_obj.z_dim_size)) z_perturbed = z + tfd.Normal(loc=0.0, scale=flags_obj.stddev).sample( sample_shape=(batch_size_tensor, flags_obj.z_dim_size)) * 1e-5 return z, z_perturbed
def variational_autoencoder(features, n_latent_dim=2, hidden_units=[500, 500], normalizing_flow='identity', flow_n_iter=2, kl_weight=1.0, random_state=123): features = tensor_utils.to_tensor(features, dtype=tf.float32) kl_weight = tensor_utils.to_tensor(kl_weight, dtype=tf.float32) n_features = tensor_utils.get_shape(features)[1] with tf.variable_scope('inference_network'): q_mu, q_sigma = ops.gaussian_inference_network( x=features, n_latent_dim=n_latent_dim, hidden_units=hidden_units) #q_mu, q_chol = ops.mvn_inference_network(x=features, # n_latent_dim=n_latent_dim, # hidden_units=hidden_units) # set up the latent variables with tf.variable_scope('latent_samples'): with st.value_type(st.SampleValue()): q_z = st.StochasticTensor(dist=distributions.Normal(mu=q_mu, sigma=q_sigma), name='q_z') #q_z = st.StochasticTensor( # dist=distributions.MultivariateNormalCholesky( # mu=q_mu, chol=q_chol), # name='q_z') # transform the sample to a more complex density by performing # a normalizing flow transformation norm_flow = flow_lib.get_flow(normalizing_flow, n_iter=flow_n_iter, random_state=random_state) q_z_trans, log_det_jac = norm_flow.transform(q_z, features=features) # set up the priors with tf.variable_scope('prior'): prior = distributions.Normal(mu=np.zeros(n_latent_dim, dtype=np.float32), sigma=np.ones(n_latent_dim, dtype=np.float32)) with tf.variable_scope('generative_network'): p_x_given_z = ops.bernoulli_generative_network( z=q_z_trans, hidden_units=hidden_units, n_features=n_features) # set up elbo log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(features), 1) kl = tf.reduce_sum(distributions.kl(q_z.distribution, prior), 1) neg_elbo = -tf.reduce_mean(log_likelihood + log_det_jac - kl_weight * kl, 0) return q_mu, tf.identity(neg_elbo, name='neg_elbo')
def testKL(self): mu1, sd1, mu2, sd2 = [np.random.rand(4, 6) for _ in range(4)] pair_kl = pair_kl_divergence(mu1, sd1, mu2, sd2) dist1 = distributions.Normal(mu1, sd1) dist2 = distributions.Normal(mu2, sd2) kl_tf = distributions.kl_divergence(dist1, dist2) with tf.Session() as sess: kl_val = sess.run(kl_tf) kl_val = kl_val.sum(axis=-1) self.assertAllClose(np.diag(pair_kl), kl_val)
def FC_bayes(x, shape, activation, scope, init=1e-3, bias=True): """ initializer for a fully-connected layer with tensorflow inputs: -shape, (tuple), input,output size of layer -activation, (string), activation function to use -init, (float), multiplier for random weight initialization """ with tf.variable_scope(scope): if init == 'xavier': init = np.sqrt(2.0 / (shape[0] + shape[1])) factor = np.sqrt(2.0 / shape[0]) init = np.log(np.exp(factor) - 1) W_mu = tf.Variable(tf.zeros(shape), name='W_mu') W_sig = tf.Variable(tf.ones(shape) * init, name='W_sig') W_sig = tf.log(1.0 + tf.exp(W_sig)) W_noise = tf.placeholder(shape=shape, dtype=tf.float32, name='W_eps') b_mu = tf.Variable(tf.zeros([shape[1]]), name='b_mu') b_sig = tf.Variable(tf.ones([shape[1]]) * init, name='b_sig') b_sig = tf.log(1.0 + tf.exp(b_sig)) b_noise = tf.placeholder(shape=shape[1], dtype=tf.float32, name='b_eps') W_samp = W_mu + W_sig * W_noise b_samp = b_mu + b_sig * b_noise #reg = tf.log(tf.reduce_prod(W_sig))+tf.log(tf.reduce_prod(b_sig)) Norm_w = distributions.Normal(loc=W_mu, scale=W_sig) Norm_b = distributions.Normal(loc=b_mu, scale=b_sig) N01_w = distributions.Normal(loc=tf.zeros(shape=shape), scale=tf.ones(shape=shape) * factor) N01_b = distributions.Normal(loc=tf.zeros(shape=shape[1]), scale=tf.ones(shape=shape[1]) * factor) reg = tf.reduce_sum(distributions.kl(Norm_w,N01_w)) +\ tf.reduce_sum(distributions.kl(Norm_b,N01_b)) if activation == 'relu': activation = tf.nn.relu elif activation == 'sigmoid': activation = tf.nn.sigmoid elif activation == 'tanh': activation = tf.tanh else: activation = tf.identity if bias: h = tf.matmul(x, W_samp) + b_samp else: h = tf.matmul(x, W_samp) a = activation(h) return a, W_noise, b_noise, reg
def _build_model(self): # input points self.x = tf.placeholder(tf.float32, shape=[None, int(np.prod(self.x_dims))], name="X") self.noise = tf.placeholder(tf.float32, shape=[None, self.z_dim], name="noise") self.p_z = dbns.Normal(loc=tf.zeros_like(self.noise), scale=tf.ones_like(self.noise)) # encoder z_params = self.encoder(self.x) z_mu = z_params[:, self.z_dim:] z_sigma = tf.exp(z_params[:, :self.z_dim]) self.q_z = dbns.Normal(loc=z_mu, scale=z_sigma) # reparameterization trick z = z_mu + tf.multiply(z_sigma, self.p_z.sample()) # z = self.q_z.sample() # decoder out_params = self.decoder(z) mu = tf.nn.sigmoid(out_params[:, int(np.prod(self.x_dims)):]) # out_mu constrained to (0,1) sigma = tf.exp(out_params[:, :int(np.prod(self.x_dims))]) self.x_hat = mu self.p_x_z = dbns.Normal(loc=mu, scale=sigma) nll_loss = -tf.reduce_sum(self.p_x_z.log_prob(self.x), 1) kl_loss = 0.5 * tf.reduce_sum(tf.square(z_mu) + tf.square(z_sigma) - tf.log(1e-8 + tf.square(z_sigma)) - 1, 1) # kl_loss = tf.reduce_sum(dbns.kl_divergence(self.q_z, self.p_z), 1) self.loss = tf.reduce_mean(nll_loss + kl_loss) self.elbo = -1.0 * tf.reduce_mean(nll_loss + kl_loss) # in original paper, lr chosen from {0.01, 0.02, 0.1} depending on first few iters training performance optimizer = tf.train.AdagradOptimizer(learning_rate=self.lr) self.train_op = optimizer.minimize(self.loss) # for sampling self.z = self.encoder(self.x, trainable=False, reuse=True) self.z_pl = tf.placeholder(tf.float32, shape=[None, self.z_dim]) self.sample = self.decoder(self.z_pl, trainable=False, reuse=True) # tensorboard summaries x_img = tf.reshape(self.x, [-1] + self.x_dims) tf.summary.image('data', x_img) xhat_img = tf.reshape(self.x_hat, [-1] + self.x_dims) tf.summary.image('reconstruction', xhat_img) tf.summary.scalar('reconstruction_loss', tf.reduce_mean(nll_loss)) tf.summary.scalar('kl_loss', tf.reduce_mean(kl_loss)) tf.summary.scalar('loss', self.loss) tf.summary.scalar('elbo', self.elbo) self.merged = tf.summary.merge_all()
def __init__(self, n_steps, cell, step_success_prob, where_mean=(-2., -2., 0., 0.), where_std=(1., 1., 1., 1.), disc_prior_type='geom', rec_where_prior=False): super().__init__() self._n_steps = n_steps self._cell = cell self._init_disc_step_success_prob = step_success_prob self._what_prior = tfd.Normal(0., 1.) self._disc_prior_type = disc_prior_type with self._enter_variable_scope(): if rec_where_prior: init = list(where_mean) + list(where_std) init = {'b': tf.constant_initializer(init)} self._where_prior = RecurrentNormal(4, 128, conditional=True, output_initializers=init) else: self._where_prior = ConditionedNormalAdaptor( where_mean, where_std)
def create_gmm_1(d, K, name='gmm', reuse=False, scale_act=tf.nn.softplus, zero_mean=False, ki=None): with tf.variable_scope(name, reuse): #tf.random_uniform_initializer(0.,3.) probs = tf.nn.softmax(tf.get_variable('probs', shape=[d, K], dtype=DTYPE, initializer=None), axis=-1) #tf.random_uniform_initializer(-.5,.5) locs = tf.get_variable('locs', shape=[d, K], dtype=DTYPE, initializer=None) if zero_mean: locs = tf.zeros_like(locs) scales = tf.get_variable('scales', shape=[d, K], dtype=DTYPE, initializer=None) pis = tfd.Categorical(probs=probs) ps = tfd.Normal(loc=locs, scale=scale_act(scales)) p = tf.contrib.distributions.MixtureSameFamily(pis, ps) p = tf.contrib.distributions.Independent(p, 1) return p
def call_gauss(self, inputs, input_stddev, training): """Pass a tensor through the bottleneck. Args: inputs: The tensor to be passed through the bottleneck. training: Boolean. If `True`, returns a differentiable approximation of the inputs, and their likelihoods under the modeled probability densities. If `False`, returns the quantized inputs and their likelihoods under the corresponding probability mass function. These quantities can't be used for training, as they are not differentiable, but represent actual compression more closely. Returns: values: `Tensor` with the same shape as `inputs` containing the perturbed or quantized input values. likelihood: `Tensor` with the same shape as `inputs` containing the likelihood of `values` under the modeled probability distributions. Raises: ValueError: if `inputs` has different `dtype` or number of channels than a previous set of inputs the model was invoked with earlier. """ inputs = ops.convert_to_tensor(inputs) input_stddev = ops.convert_to_tensor(input_stddev) inputs = array_ops.expand_dims(inputs, axis=4) input_stddev = array_ops.expand_dims(input_stddev, axis=4) #self.build_gauss(input_stddev) half = constant_op.constant(.5, dtype=self.dtype) # Convert to (channels, 1, batch) format by commuting channels to front # and then collapsing. values = inputs stddev = input_stddev # Add noise or quantize. if training: noise = random_ops.random_uniform(array_ops.shape(values), -half, half) values = math_ops.add_n([values, noise]) elif self.optimize_integer_offset: values = math_ops.round(values - self._medians) + self._medians else: values = math_ops.round(values) mean = constant_op.constant(0., dtype=self.dtype, shape=(self.n, self.h, self.w, self.c, 1)) norm_dist = tfd.Normal(loc=mean, scale=stddev) likelihood = abs(norm_dist.cdf(values + half) - norm_dist.cdf(values - half)) if self.likelihood_bound > 0: likelihood_bound = constant_op.constant( self.likelihood_bound, dtype=self.dtype) likelihood = tfc_math_ops.lower_bound(likelihood, likelihood_bound) if not context.executing_eagerly(): values_shape, likelihood_shape = self.compute_output_shape(inputs.shape) values.set_shape(values_shape) likelihood.set_shape(likelihood_shape) values = array_ops.squeeze(values, [-1]) likelihood = array_ops.squeeze(likelihood, [-1]) return values, likelihood
def __init__(self, region, args, name, given_means=None, given_stddevs=None, mean=0.0, num_dims=0): super().__init__(name) self.local_size = len(region) self.args = args self.scope = sorted(list(region)) self.size = args.num_gauss self.num_dims = num_dims self.np_means = None self.means = self.args.param_provider.grab_leaf_parameters( self.scope, args.num_gauss, name=name + "_means") if args.gauss_min_var < args.gauss_max_var: sigma_params = self.args.param_provider.grab_leaf_parameters( self.scope, args.num_gauss, name=name + "_sigma_params") self.sigma = args.gauss_min_var + (args.gauss_max_var - args.gauss_min_var) * tf.sigmoid(sigma_params) else: self.sigma = 1.0 self.dist = dists.Normal(self.means, tf.sqrt(self.sigma))
def __init__(self, series, time_step=10, batch_size=10, cell_size=100): scale = np.std(np.multiply(series, 2.0), dtype=np.float32) self.__nmFunc = dst.Normal(loc=np.mean(series, 0, np.float32), scale=scale) self.__meta_list = series self.__input_size = 1 self.__output_size = 1 self.__forget_bias = 0.2 self.__batch_size = batch_size self.__cell_size = cell_size self.__time_step = time_step self.cursor = 0 self.__labels = None self.__batches = None self.__input_layer_var = None self.__cell_state = None self.__out_state = None self.__predication = None self.__last_out = None self.__input_placeholder = tf.placeholder(dtype=tf.float32, name="Inputs") self.__label_placeholder = tf.placeholder(dtype=tf.float32, name="labels") self.__predication = np.asarray([]) self.__generate_next_batch() # Flags self.trained = False self.config = tf.ConfigProto( device_count={"CPU": 3}, # limit to num_cpu_core CPU usage inter_op_parallelism_threads=1, intra_op_parallelism_threads=1, log_device_placement=False) self.__session_holder = None
def _build_network(self): with tf.variable_scope('critic'): c_h1 = layers.fully_connected(self.obs, self.hidden_size, trainable=self.trainable) c_out = layers.fully_connected(c_h1, 1, activation_fn=None, trainable=self.trainable) with tf.variable_scope('actor'): a_h1 = layers.fully_connected(self.obs, self.hidden_size, trainable=self.trainable) a_out = layers.fully_connected(a_h1, self.num_ac, activation_fn=None, trainable=self.trainable) log_std = tf.get_variable('log_std', [1, self.num_ac], dtype=tf.float32, initializer=tf.constant_initializer( self.init_std), trainable=self.trainable) std = tf.exp(log_std) a_dist = dist.Normal(a_out, std) self.log_prob = a_dist.log_prob(self.acs) self.entropy = tf.reduce_mean(a_dist.entropy()) self.value = tf.identity(c_out) self.action = a_dist.sample()
def _compute_what(self, img, what_tm1, where, hidden_output, temporal_hidden_state, temporal_state): what_distrib = self._glimpse_encoder(img, where, mask_inpt=temporal_state)[0] loc, scale = what_distrib.loc, what_distrib.scale inpt = tf.concat((hidden_output, where, loc, scale), -1) temporal_output, temporal_hidden_state = self._temporal_cell( inpt, temporal_hidden_state) n_dim = int(what_tm1.shape[-1]) temporal_distrib = GaussianFromParamVec(n_dim)(temporal_output) remember_bias = {'b': tf.constant_initializer(1.)} gates = Nonlinear(n_dim * 3, tf.nn.sigmoid, remember_bias)(temporal_output) gates *= .9999 forget_gate, input_gate, temporal_gate = tf.split(gates, 3, -1) what_distrib = tfd.Normal( loc=forget_gate * what_tm1 + (1. - input_gate) * loc + (1. - temporal_gate) * temporal_distrib.loc, scale=(1. - input_gate) * scale + (1. - temporal_gate) * temporal_distrib.scale) what_sample = what_distrib.sample() what = what_sample return what, what_sample, what_distrib.loc, what_distrib.scale, temporal_hidden_state
def __init__(self, config, attention, latent_space, scope='ChiSquaredSampler'): """ Initialize the sampler """ super(ChiSquaredSampler, self).__init__( config, attention, latent_space, scope=scope) shape = (config.batch_size, self.sample_size) self.prior = distributions.Normal(tf.zeros(shape), tf.ones(shape), name='prior')
def _compute_what(self, img, what_tm1, where, hidden_output, temporal_hidden_state, temporal_state): # takes the input image, takes the glimpse based on the where latent variable #and outputs the parameters for what distribution what_distrib = self._glimpse_encoder(img, where, mask_inpt = temporal_state)[0] #splitting the parameters into mean and covariance loc, scale = what_distrib.loc, what_distrib.scale #concatenating the output from ST(loc, scale) with output from relational RNN ant #where latent variable from current timestep inpt = tf.concat((hidden_output, where, loc, scale), -1) #applying temporal RNN and getting the weights and hidden state temporal_output, temporal_hidden_state = self._temporal_cell(inpt, temporal_hidden_state) n_dim = int(what_tm1.shape[-1]) temporal_distrib = GaussianFromParamVec(n_dim)(temporal_output) remember_bias = {'b': tf.constant_initializer(1.)} gates = Nonlinear(n_dim * 3, tf.nn.sigmoid, remember_bias)(temporal_output) gates *= .9999 forget_gate, input_gate, temporal_gate = tf.split(gates, 3, -1) #constructing what distribution what_distrib = tfd.Normal( loc=forget_gate * what_tm1 + (1. - input_gate) * loc + (1. - temporal_gate) * temporal_distrib.loc, scale=(1. - input_gate) * scale + (1. - temporal_gate) * temporal_distrib.scale ) #sampling variable 'what' what_sample = what_distrib.sample() what = what_sample return what, what_sample, what_distrib.loc, what_distrib.scale, temporal_hidden_state
class Experiment(object): _noise = tfd.Normal(0., 0.001) def __init__(self, env: gym.Env, use_monitor=False): """Experiment Args: env: OpenAI Gym. use_monitor: """ self._env = env self._use_monitor = use_monitor self.episode_n = 0. def rollout(self, policy, random_trajectory=False): """Return Normalized trajectry. policy: Policy that returns probability. Returns: One trajectory. """ if not isinstance(self._env.action_space, gym.spaces.Box): raise ValueError('This rollout is called only continuous ones.') if len(self._env.action_space.shape) > 1: raise NotImplementedError('Multi action cannot impemented.') if not random_trajectory: self.episode_n += 1 observ = self._env.reset() if random_trajectory: observ += self._noise.sample(sample_shape=observ.shape) trajectory = [] for t in itertools.count(): action_prob = policy.predict(observ) action = action_prob.sample() assert action.shape == self._env.action_space.shape next_observ, reward, done, _ = self._env.step(action) trajectory.append( transition(observ, action, next_observ, reward, done)) if done: break # Normalize observations. observs, actions, next_observs, _, _ = map(np.asarray, zip(*trajectory)) normalize_observ = np.stack([observs, next_observs], axis=0).mean() normalize_action = actions.mean() for i, t in enumerate(trajectory): t = t._replace( observ=(t.observ / normalize_observ), action=(t.action / normalize_action), next_observ=(t.next_observ / normalize_observ), ) trajectory[i] = t return trajectory
def loglikelihood(mean_arr, sampled_arr, sigma): mu = tf.stack(mean_arr) # mu = [timesteps, batch_sz, loc_dim] sampled = tf.stack(sampled_arr) # same shape as mu gaussian = distributions.Normal(mu, sigma) logll = gaussian.log_prob(sampled) # [timesteps, batch_sz, loc_dim] logll = tf.reduce_sum(logll, 2) logll = tf.transpose(logll) # [batch_sz, timesteps] return logll
def get_mix(m, x, s, j, nn): x1 = tf.Variable(x, name="x" + str(j) + str(nn)) # x1 = tf.constant(x, name="x" + str(j) + str(nn)) s1 = tf.Variable(s, name="s" + str(j) + str(nn)) m1 = tf.Variable(m, name="m" + str(j) + str(nn)) comp_1 = tfd.Normal(loc=x1, scale=s1) return m1, comp_1
def _compute_where(self, hidden_output): loc, scale = self._transform_estimator(hidden_output) if self._where_loc_bias is not None: loc += np.asarray(self._where_loc_bias).reshape((1, 4)) scale = tf.nn.softplus(scale) + 1e-2 where_distrib = tfd.Normal(loc, scale, validate_args=self._debug, allow_nan_stats=not self._debug) return where_distrib.sample(), loc, scale
def define_model(self, graph, sample_size=20, samples=1, recognition=None, reuse=None, **kwargs): """ Define a VariationalAutoencoderModel. For more details see Auto-Encoding Variational Bayes: https://arxiv.org/pdf/1312.6114v10.pdf Args: sample_size: The size of the samples from the approximate posterior samples: The number of samples approximate posterior recognition: Model to generate q(z|x). Required parameter. the model, but can be set later on the VariationalAutoencoderModel. reuse: Whether to reuse variables Returns: A VariationalAutoencoderModel """ if recognition is None: raise TypeError( 'define_model() needs keyword only argument recognition') with tf.variable_scope('mean', reuse=reuse): mean = self.linear_layers(recognition.output_tensor, (sample_size), reuse=reuse)[-1] with tf.variable_scope('log_variance', reuse=reuse): log_variance = self.linear_layers(recognition.output_tensor, (sample_size), reuse=reuse)[-1] p_z = distributions.Normal(0.0, 1.0, name='P_z') q_z = distributions.Normal(mean, tf.sqrt(tf.exp(log_variance)), name='Q_z') posterior = tf.reduce_mean(q_z.sample(samples), 0) kl_divergence = tf.reduce_sum(distributions.kl(q_z, p_z), 1) return VariationalAutoencoderModel(graph, recognition, posterior, kl_divergence)
def get_data_normal_distribution_arguments(): try: res = self.rm_dst except: mean = tf.reduce_mean(r1 + tf.multiply(gamma, r2)) scl = tf.sqrt(self.__get_variance(r1 + tf.multiply(gamma, r2))) self.nm_dst = dst.Normal(loc=mean, scale=scl) res = self.nm_dst return res
def _z(self, arg, is_prior): mean = self._linear(arg, self.z_size) stddev = self._linear(arg, self.z_size) stddev = tf.sqrt(tf.exp(stddev)) epsilon = tf.random_normal(shape=[self.batch_size, self.z_size]) z = mean if is_prior else mean + tf.multiply(stddev, epsilon) pdf_z = ds.Normal(loc=mean, scale=stddev) return z, pdf_z
def log_prob_of_improv(self, kernel, gp_sampled_x, gp_sampled_y, new_points): mu = tf.reshape(self.mean(kernel, gp_sampled_x, gp_sampled_y, new_points), [-1]) sigma = tf.diag_part(self.cov(kernel, gp_sampled_x, new_points)) non_zero_variance = tf.greater(sigma, 0., name="variance_Control_Op") sigma_safe = tf.where(non_zero_variance, sigma, tf.tile(tf.constant([1.]), tf.shape(sigma))) normal_distribution = dist.Normal(loc=mu, scale=sigma_safe) min_sampled_y = tf.reshape(tf.reduce_min(gp_sampled_y), [-1]) return tf.where(non_zero_variance, normal_distribution.log_cdf(min_sampled_y), tf.tile(tf.constant([0.]), tf.shape(non_zero_variance)))
def _build(self, what, where=None, presence=None): glimpse = self._batch(self._glimpse_decoder)(what) canvas = self._decode(glimpse, presence, where) canvas, written_to_mask = self._add_mean_image(canvas, presence, where) output_std = written_to_mask * self._output_std + ( 1. - written_to_mask) * self._background_std pdf = tfd.Normal(canvas, output_std) return pdf, glimpse
def standard_normal(points, tfdt): """ Standard Normal :param points: :return: """ _loc = tf.constant(0.0, dtype=tfdt) _scale = tf.constant(1.0, dtype=tfdt) p = tfd.Normal(loc=_loc, scale=_scale) return p.quantile(points)
def bayesian_categorical_crossentropy_internal(true, pred_var): std = K.sqrt(pred_var[:, num_classes:]) pred = pred_var[:, 0:num_classes] iterable = K.variable(np.ones(T)) dist = distributions.Normal(loc=K.zeros_like(std), scale=std) monte_carlo_results = K.map_fn(gaussian_categorical_crossentropy(true, pred, dist, num_classes), iterable, name='monte_carlo_results') variance_loss = K.mean(monte_carlo_results, axis=0) return variance_loss
def get_z(input, batch_size, z_size, W_mean, W_stddev, b_mean, b_stddev, is_prior): mean = tf.tensordot(input, W_mean, axes=1) + b_mean stddev = tf.tensordot(input, W_stddev, axes=1) + b_stddev stddev = tf.sqrt(tf.exp(stddev)) epsilon = tf.random_normal(shape=[batch_size, z_size], name='epsilon') z = mean if is_prior else mean + tf.multiply(stddev, epsilon) pdf_z = ds.Normal(loc=mean, scale=stddev) return z, pdf_z
def _forward(self, sample_m1, hidden_state, sample=None): output, state = self._rnn(sample_m1, hidden_state) stats = self._readout(output) loc, scale = tf.split(stats, 2, -1) scale = tf.nn.softplus(scale) + 1e-2 pdf = tfd.Normal(loc, scale) if sample is None: sample = pdf.sample() return sample, loc, scale, pdf.log_prob(sample)
def gaussian_reparmeterization(logits_z, rnd_sample=None): ''' The vanilla gaussian reparameterization from Kingma et. al z = mu + sigma * N(0, I) ''' zshp = logits_z.get_shape().as_list() assert zshp[1] % 2 == 0 q_sigma = 1e-6 + tf.nn.softplus(logits_z[:, 0:zshp[1] / 2]) q_mu = logits_z[:, zshp[1] / 2:] # Prior p_z = d.Normal(loc=tf.zeros(zshp[1] / 2), scale=tf.ones(zshp[1] / 2)) with st.value_type(st.SampleValue()): q_z = st.StochasticTensor(d.Normal(loc=q_mu, scale=q_sigma)) reduce_index = [1] if len(zshp) == 2 else [1, 2] kl = d.kl(q_z.distribution, p_z, allow_nan_stats=False) return [q_z, tf.reduce_sum(kl, reduce_index)]