Exemple #1
0
    def test_monto_carlo_objective(self):
        with self.test_session() as sess:
            log_p, log_q = prepare_test_payload()

            obj = monte_carlo_objective(log_p, log_q, axis=0)
            obj_shape = obj.get_shape().as_list()
            assert_allclose(*sess.run(
                [obj, log_mean_exp(tfops, log_p - log_q, axis=0)]))

            obj_k = monte_carlo_objective(log_p, log_q, axis=0, keepdims=True)
            self.assertListEqual([1] + obj_shape, obj_k.get_shape().as_list())
            assert_allclose(*sess.run([
                obj_k,
                log_mean_exp(tfops, log_p - log_q, axis=0, keepdims=True)
            ]))
def add_p_z_given_y_reg_loss(loss):
    if not config.p_z_given_y_reg:
        return loss
    n_z = config.p_z_given_y_reg_samples
    y = tf.range(config.n_clusters, dtype=tf.int32)
    prior = gaussian_mixture_prior(y=y,
                                   z_dim=config.z_dim,
                                   n_clusters=config.n_clusters,
                                   use_concrete=False)
    prior0 = Normal(mean=0., logstd=0.)
    z = prior.sample(n_z, is_reparameterized=True)

    # Note: p(y) = 1/n_clusters, which simplies the
    #       following deduction.
    if config.p_z_given_y_reg == 'kl_p_z_given_y':
        # :math:`E_{y}[KL(p(z|y)\|p_0(z))] =
        #        E_{y,z \sim p(z|y)}[\log p(z|y) - \log p_0(z)]`
        reg_loss = tf.reduce_mean(
            prior.log_prob(z, group_ndims=1) -
            prior0.log_prob(z, group_ndims=1))
    elif config.p_z_given_y_reg == 'kl_p_z':
        # :math:`KL(E_{y}(p(z|y))\|p_0(z)) =
        #        E_{y,z \sim p(z|y)}[\log \E_{y}(p(z|y)) - \log p_0(z)]`
        log_p_z = log_mean_exp(tfops,
                               prior.log_prob(z, group_ndims=1),
                               axis=-1)
        log_p0_z = prior0.log_prob(z, group_ndims=1)
        reg_loss = tf.reduce_mean(log_p_z) - tf.reduce_mean(log_p0_z)
    else:
        raise ValueError(
            'Unexpected value for config `p_z_given_y_reg`: {}'.format(
                config.p_z_given_y_reg))

    return loss + config.p_z_given_y_reg_factor * reg_loss
Exemple #3
0
    def test_monto_carlo_objective(self):
        with self.test_session() as sess:
            log_p, log_q = prepare_test_payload()

            ll = importance_sampling_log_likelihood(log_p, log_q, axis=0)
            ll_shape = ll.get_shape().as_list()
            assert_allclose(
                *sess.run([ll, log_mean_exp(tfops, log_p - log_q, axis=0)]))

            ll_k = importance_sampling_log_likelihood(log_p,
                                                      log_q,
                                                      axis=0,
                                                      keepdims=True)
            self.assertListEqual([1] + ll_shape, ll_k.get_shape().as_list())
            assert_allclose(*sess.run([
                ll_k,
                log_mean_exp(tfops, log_p - log_q, axis=0, keepdims=True)
            ]))
Exemple #4
0
def monte_carlo_objective(log_joint, latent_log_prob, axis=None,
                          keepdims=False, name=None):
    """
    Derive the Monte-Carlo objective.

    .. math::

        \\mathcal{L}_{K}(\\mathbf{x};\\theta,\\phi) =
            \\mathbb{E}_{\\mathbf{z}^{(1:K)} \\sim q_{\\phi}(\\mathbf{z}|\\mathbf{x})}\\Bigg[
                \\log \\frac{1}{K} \\sum_{k=1}^K {
                    \\frac{p_{\\theta}(\\mathbf{x},\\mathbf{z}^{(k)})}
                         {q_{\\phi}(\\mathbf{z}^{(k)}|\\mathbf{x})}
                }
            \\Bigg]

    Args:
        log_joint: Values of :math:`\\log p(\\mathbf{z},\\mathbf{x})`,
            computed with :math:`\\mathbf{z} \\sim q(\\mathbf{z}|\\mathbf{x})`.
        latent_log_prob: :math:`q(\\mathbf{z}|\\mathbf{x})`.
        axis: The sampling dimensions to be averaged out.
        keepdims (bool): When `axis` is specified, whether or not to keep
            the averaged dimensions?  (default :obj:`False`)
        name (str): Name of this operation in TensorFlow graph.
            (default "monte_carlo_objective")

    Returns:
        tf.Tensor: The Monte Carlo objective.  Not applicable for training.
    """
    _require_multi_samples(axis, 'monte carlo objective')
    log_joint = tf.convert_to_tensor(log_joint)
    latent_log_prob = tf.convert_to_tensor(latent_log_prob)
    with tf.name_scope(name,
                       default_name='monte_carlo_objective',
                       values=[log_joint, latent_log_prob]):
        likelihood = log_joint - latent_log_prob
        objective = log_mean_exp(
            tfops, likelihood, axis=axis, keepdims=keepdims)
        return objective
Exemple #5
0
def iwae_estimator(log_values, axis, keepdims=False, name=None):
    """
    Derive the gradient estimator for
    :math:`\\mathbb{E}_{q(\\mathbf{z}^{(1:K)}|\\mathbf{x})}\\Big[\\log \\frac{1}{K} \\sum_{k=1}^K f\\big(\\mathbf{x},\\mathbf{z}^{(k)}\\big)\\Big]`,
    by IWAE (Burda, Y., Grosse, R. and Salakhutdinov, R., 2015) algorithm.

    .. math::

        \\begin{aligned}
            &\\nabla\\,\\mathbb{E}_{q(\\mathbf{z}^{(1:K)}|\\mathbf{x})}\\Big[\\log \\frac{1}{K} \\sum_{k=1}^K f\\big(\\mathbf{x},\\mathbf{z}^{(k)}\\big)\\Big]
                = \\nabla \\, \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\log \\frac{1}{K} \\sum_{k=1}^K w_k\\Bigg]
                = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\nabla \\log \\frac{1}{K} \\sum_{k=1}^K w_k\\Bigg] = \\\\
                & \\quad \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\frac{\\nabla \\frac{1}{K} \\sum_{k=1}^K w_k}{\\frac{1}{K} \\sum_{i=1}^K w_i}\\Bigg]
                = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\frac{\\sum_{k=1}^K w_k \\nabla \\log w_k}{\\sum_{i=1}^K w_i}\\Bigg]
                = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\sum_{k=1}^K \\widetilde{w}_k \\nabla \\log w_k\\Bigg]
        \\end{aligned}

    Args:
        log_values: Log values of the target function given `z` and `x`, i.e.,
            :math:`\\log f(\\mathbf{z},\\mathbf{x})`.
        axis: The sampling dimensions to be averaged out.
        keepdims (bool): When `axis` is specified, whether or not to keep
            the averaged dimensions?  (default :obj:`False`)
        name (str): Name of this operation in TensorFlow graph.
            (default "iwae_estimator")

    Returns:
        tf.Tensor: The surrogate for optimizing the target function
            with IWAE gradient estimator.
    """
    _require_multi_samples(axis, 'iwae estimator')
    log_values = tf.convert_to_tensor(log_values)
    with tf.name_scope(name, default_name='iwae_estimator',
                       values=[log_values]):
        estimator = log_mean_exp(
            tfops, log_values, axis=axis, keepdims=keepdims)
        return estimator
Exemple #6
0
def importance_sampling_log_likelihood(log_joint,
                                       latent_log_prob,
                                       axis,
                                       keepdims=False,
                                       name=None):
    """
    Compute :math:`\\log p(\\mathbf{x})` by importance sampling.

    .. math::

        \\log p(\\mathbf{x}) =
            \\log \\mathbb{E}_{q(\\mathbf{z}|\\mathbf{x})} \\Big[\\exp\\big(\\log p(\\mathbf{x},\\mathbf{z}) - \\log q(\\mathbf{z}|\\mathbf{x})\\big) \\Big]

    Args:
        log_joint: Values of :math:`\\log p(\\mathbf{z},\\mathbf{x})`,
            computed with :math:`\\mathbf{z} \\sim q(\\mathbf{z}|\\mathbf{x})`.
        latent_log_prob: :math:`q(\\mathbf{z}|\\mathbf{x})`.
        axis: The sampling dimensions to be averaged out.
        keepdims (bool): When `axis` is specified, whether or not to keep
            the averaged dimensions?  (default :obj:`False`)
        name (str): Name of this operation in TensorFlow graph.
            (default "importance_sampling_log_likelihood")

    Returns:
        The computed :math:`\\log p(x)`.
    """
    _require_multi_samples(axis, 'importance sampling log-likelihood')
    log_joint = tf.convert_to_tensor(log_joint)
    latent_log_prob = tf.convert_to_tensor(latent_log_prob)
    with tf.name_scope(name,
                       default_name='importance_sampling_log_likelihood',
                       values=[log_joint, latent_log_prob]):
        log_p = log_mean_exp(tfops,
                             log_joint - latent_log_prob,
                             axis=axis,
                             keepdims=keepdims)
        return log_p