def test_monto_carlo_objective(self): with self.test_session() as sess: log_p, log_q = prepare_test_payload() obj = monte_carlo_objective(log_p, log_q, axis=0) obj_shape = obj.get_shape().as_list() assert_allclose(*sess.run( [obj, log_mean_exp(tfops, log_p - log_q, axis=0)])) obj_k = monte_carlo_objective(log_p, log_q, axis=0, keepdims=True) self.assertListEqual([1] + obj_shape, obj_k.get_shape().as_list()) assert_allclose(*sess.run([ obj_k, log_mean_exp(tfops, log_p - log_q, axis=0, keepdims=True) ]))
def add_p_z_given_y_reg_loss(loss): if not config.p_z_given_y_reg: return loss n_z = config.p_z_given_y_reg_samples y = tf.range(config.n_clusters, dtype=tf.int32) prior = gaussian_mixture_prior(y=y, z_dim=config.z_dim, n_clusters=config.n_clusters, use_concrete=False) prior0 = Normal(mean=0., logstd=0.) z = prior.sample(n_z, is_reparameterized=True) # Note: p(y) = 1/n_clusters, which simplies the # following deduction. if config.p_z_given_y_reg == 'kl_p_z_given_y': # :math:`E_{y}[KL(p(z|y)\|p_0(z))] = # E_{y,z \sim p(z|y)}[\log p(z|y) - \log p_0(z)]` reg_loss = tf.reduce_mean( prior.log_prob(z, group_ndims=1) - prior0.log_prob(z, group_ndims=1)) elif config.p_z_given_y_reg == 'kl_p_z': # :math:`KL(E_{y}(p(z|y))\|p_0(z)) = # E_{y,z \sim p(z|y)}[\log \E_{y}(p(z|y)) - \log p_0(z)]` log_p_z = log_mean_exp(tfops, prior.log_prob(z, group_ndims=1), axis=-1) log_p0_z = prior0.log_prob(z, group_ndims=1) reg_loss = tf.reduce_mean(log_p_z) - tf.reduce_mean(log_p0_z) else: raise ValueError( 'Unexpected value for config `p_z_given_y_reg`: {}'.format( config.p_z_given_y_reg)) return loss + config.p_z_given_y_reg_factor * reg_loss
def test_monto_carlo_objective(self): with self.test_session() as sess: log_p, log_q = prepare_test_payload() ll = importance_sampling_log_likelihood(log_p, log_q, axis=0) ll_shape = ll.get_shape().as_list() assert_allclose( *sess.run([ll, log_mean_exp(tfops, log_p - log_q, axis=0)])) ll_k = importance_sampling_log_likelihood(log_p, log_q, axis=0, keepdims=True) self.assertListEqual([1] + ll_shape, ll_k.get_shape().as_list()) assert_allclose(*sess.run([ ll_k, log_mean_exp(tfops, log_p - log_q, axis=0, keepdims=True) ]))
def monte_carlo_objective(log_joint, latent_log_prob, axis=None, keepdims=False, name=None): """ Derive the Monte-Carlo objective. .. math:: \\mathcal{L}_{K}(\\mathbf{x};\\theta,\\phi) = \\mathbb{E}_{\\mathbf{z}^{(1:K)} \\sim q_{\\phi}(\\mathbf{z}|\\mathbf{x})}\\Bigg[ \\log \\frac{1}{K} \\sum_{k=1}^K { \\frac{p_{\\theta}(\\mathbf{x},\\mathbf{z}^{(k)})} {q_{\\phi}(\\mathbf{z}^{(k)}|\\mathbf{x})} } \\Bigg] Args: log_joint: Values of :math:`\\log p(\\mathbf{z},\\mathbf{x})`, computed with :math:`\\mathbf{z} \\sim q(\\mathbf{z}|\\mathbf{x})`. latent_log_prob: :math:`q(\\mathbf{z}|\\mathbf{x})`. axis: The sampling dimensions to be averaged out. keepdims (bool): When `axis` is specified, whether or not to keep the averaged dimensions? (default :obj:`False`) name (str): Name of this operation in TensorFlow graph. (default "monte_carlo_objective") Returns: tf.Tensor: The Monte Carlo objective. Not applicable for training. """ _require_multi_samples(axis, 'monte carlo objective') log_joint = tf.convert_to_tensor(log_joint) latent_log_prob = tf.convert_to_tensor(latent_log_prob) with tf.name_scope(name, default_name='monte_carlo_objective', values=[log_joint, latent_log_prob]): likelihood = log_joint - latent_log_prob objective = log_mean_exp( tfops, likelihood, axis=axis, keepdims=keepdims) return objective
def iwae_estimator(log_values, axis, keepdims=False, name=None): """ Derive the gradient estimator for :math:`\\mathbb{E}_{q(\\mathbf{z}^{(1:K)}|\\mathbf{x})}\\Big[\\log \\frac{1}{K} \\sum_{k=1}^K f\\big(\\mathbf{x},\\mathbf{z}^{(k)}\\big)\\Big]`, by IWAE (Burda, Y., Grosse, R. and Salakhutdinov, R., 2015) algorithm. .. math:: \\begin{aligned} &\\nabla\\,\\mathbb{E}_{q(\\mathbf{z}^{(1:K)}|\\mathbf{x})}\\Big[\\log \\frac{1}{K} \\sum_{k=1}^K f\\big(\\mathbf{x},\\mathbf{z}^{(k)}\\big)\\Big] = \\nabla \\, \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\log \\frac{1}{K} \\sum_{k=1}^K w_k\\Bigg] = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\nabla \\log \\frac{1}{K} \\sum_{k=1}^K w_k\\Bigg] = \\\\ & \\quad \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\frac{\\nabla \\frac{1}{K} \\sum_{k=1}^K w_k}{\\frac{1}{K} \\sum_{i=1}^K w_i}\\Bigg] = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\frac{\\sum_{k=1}^K w_k \\nabla \\log w_k}{\\sum_{i=1}^K w_i}\\Bigg] = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\sum_{k=1}^K \\widetilde{w}_k \\nabla \\log w_k\\Bigg] \\end{aligned} Args: log_values: Log values of the target function given `z` and `x`, i.e., :math:`\\log f(\\mathbf{z},\\mathbf{x})`. axis: The sampling dimensions to be averaged out. keepdims (bool): When `axis` is specified, whether or not to keep the averaged dimensions? (default :obj:`False`) name (str): Name of this operation in TensorFlow graph. (default "iwae_estimator") Returns: tf.Tensor: The surrogate for optimizing the target function with IWAE gradient estimator. """ _require_multi_samples(axis, 'iwae estimator') log_values = tf.convert_to_tensor(log_values) with tf.name_scope(name, default_name='iwae_estimator', values=[log_values]): estimator = log_mean_exp( tfops, log_values, axis=axis, keepdims=keepdims) return estimator
def importance_sampling_log_likelihood(log_joint, latent_log_prob, axis, keepdims=False, name=None): """ Compute :math:`\\log p(\\mathbf{x})` by importance sampling. .. math:: \\log p(\\mathbf{x}) = \\log \\mathbb{E}_{q(\\mathbf{z}|\\mathbf{x})} \\Big[\\exp\\big(\\log p(\\mathbf{x},\\mathbf{z}) - \\log q(\\mathbf{z}|\\mathbf{x})\\big) \\Big] Args: log_joint: Values of :math:`\\log p(\\mathbf{z},\\mathbf{x})`, computed with :math:`\\mathbf{z} \\sim q(\\mathbf{z}|\\mathbf{x})`. latent_log_prob: :math:`q(\\mathbf{z}|\\mathbf{x})`. axis: The sampling dimensions to be averaged out. keepdims (bool): When `axis` is specified, whether or not to keep the averaged dimensions? (default :obj:`False`) name (str): Name of this operation in TensorFlow graph. (default "importance_sampling_log_likelihood") Returns: The computed :math:`\\log p(x)`. """ _require_multi_samples(axis, 'importance sampling log-likelihood') log_joint = tf.convert_to_tensor(log_joint) latent_log_prob = tf.convert_to_tensor(latent_log_prob) with tf.name_scope(name, default_name='importance_sampling_log_likelihood', values=[log_joint, latent_log_prob]): log_p = log_mean_exp(tfops, log_joint - latent_log_prob, axis=axis, keepdims=keepdims) return log_p