Beispiel #1
0
 def elbo_components(self, inputs, training=None, mask=None, **kwargs):
     llk, kl = super().elbo_components(inputs=inputs,
                                       mask=mask,
                                       training=training)
     P, Q = self.last_outputs
     n_latents = len(self.ladder_latents) // 2
     for i in range(n_latents):
         pz = [p for p in P if f'ladder_p{i}' in p.name][0]
         qz = [q for q in Q if f'ladder_q{i}' in q.name][0]
         kl[f'kl_ladder{i}'] = self.beta * kl_divergence(
             q=qz,
             p=pz,
             analytic=self.analytic,
             free_bits=self.free_bits,
             reverse=self.reverse)
     return llk, kl
  def kl_divergence(self,
                    prior=None,
                    analytic=True,
                    sample_shape=1,
                    reverse=True):
    """ KL(q||p) where `p` is the posterior distribution returned from last
    call

    Parameters
    -----------
    prior : instance of `tensorflow_probability.Distribution`
        prior distribution of the latent
    analytic : `bool` (default=`True`). Using closed form solution for
        calculating divergence, otherwise, sampling with MCMC
    reverse : `bool`.
        If `True`, calculate `KL(q||p)` else `KL(p||q)`
    sample_shape : `int` (default=`1`)
        number of MCMC sample if `analytic=False`

    Returns
    --------
      kullback_divergence : Tensor [sample_shape, batch_size, ...]
    """
    if prior is None:
      prior = self._prior
    assert isinstance(prior, Distribution), "prior is not given!"
    if self.posterior is None:
      raise RuntimeError(
        "DistributionDense must be called to create the distribution before "
        "calculating the kl-divergence.")

    kullback_div = kl_divergence(q=self.posterior,
                                 p=prior,
                                 analytic=bool(analytic),
                                 reverse=reverse,
                                 q_sample=sample_shape)
    if analytic:
      kullback_div = tf.expand_dims(kullback_div, axis=0)
      if isinstance(sample_shape, Number) and sample_shape > 1:
        ndims = kullback_div.shape.ndims
        kullback_div = tf.tile(kullback_div, [sample_shape] + [1] * (ndims - 1))
    return kullback_div
    def call(self, inputs):
        docs_topics_posterior = self.encoder(inputs)
        docs_topics_samples = docs_topics_posterior.sample(self.n_mcmc_samples)

        # [n_topics, n_words]
        topics_words_probs = tf.nn.softmax(self.topics_words_logits, axis=1)
        # [n_docs, n_words]
        docs_words_probs = tf.matmul(docs_topics_samples, topics_words_probs)
        output_dist = self.decoder(
            tf.clip_by_value(docs_words_probs, 1e-4, 1 - 1e-4))

        # initiate prior, concentration is clipped to stable range
        # for Dirichlet
        concentration = tf.clip_by_value(tf.nn.softplus(self.prior_logit),
                                         1e-3, 1e3)
        topics_prior = Dirichlet(concentration=concentration,
                                 name="topics_prior")

        # ELBO
        kl = kl_divergence(q=docs_topics_posterior,
                           p=topics_prior,
                           analytic=self.analytic,
                           q_sample=self.n_mcmc_samples,
                           auto_remove_independent=True)
        if self.analytic:
            kl = tf.expand_dims(kl, axis=0)
        llk = output_dist.log_prob(inputs)
        ELBO = llk - kl

        # maximizing ELBO, hence, minizing following loss
        self.add_loss(tf.reduce_mean(-ELBO))
        self.add_metric(tf.reduce_mean(kl), aggregation='mean', name="MeanKL")
        self.add_metric(tf.reduce_mean(-llk),
                        aggregation='mean',
                        name="MeanNLLK")

        return output_dist
Beispiel #4
0
 def kl_divergence(self,
                   analytic: bool = False,
                   reverse: bool = False,
                   free_bits: Optional[float] = None,
                   raise_not_init: bool = True) -> tf.Tensor:
     if self._disable:
         return tf.zeros((), dtype=self.dtype)
     if raise_not_init:
         if self._posterior is None:
             raise ValueError(
                 'No posterior for the hierarchical latent variable.')
         if self._prior is None:
             raise ValueError(
                 "This HierarchicalLatents haven't been called.")
     elif self._posterior is None or self._prior is None:
         return tf.zeros((), dtype=self.dtype)
     qz = self.posterior
     pz = self.prior
     kld = kl_divergence(q=qz,
                         p=pz,
                         analytic=analytic,
                         reverse=reverse,
                         free_bits=free_bits)
     return self.beta * kld
Beispiel #5
0
 def _elbo(self, inputs, pX_Z, qZ_X, mask, training):
     org_inputs = inputs
     inputs = inputs[:len(self.output_layers)]
     if mask is None:
         if len(org_inputs) == len(self.output_layers):  # no labelled
             X_unlabelled = inputs
         else:  # all data is labelled
             X_unlabelled = [
                 tf.zeros(shape=(0, ) + i.shape[1:]) for i in inputs
             ]
     else:
         m = tf.logical_not(tf.reshape(mask, (-1, )))
         X_unlabelled = [tf.boolean_mask(i, m, axis=0) for i in inputs]
     ## prepare inputs as usual
     org_inputs, y, mask = self.prepare_inputs(org_inputs, mask)
     X_labelled = [tf.boolean_mask(i, mask, axis=0) for i in org_inputs]
     ## Normal ELBO
     llk, div = super()._elbo(org_inputs,
                              pX_Z,
                              qZ_X,
                              mask=mask,
                              training=training)
     mask = tf.reshape(mask, (-1, ))
     ### for unlabelled data
     mask_unlabelled = tf.logical_not(mask)
     pY_X = self.classify(X_unlabelled)
     probs = pY_X.probs_parameter()
     # log-likehood
     llk_unlabelled = {}
     for name, lk in llk.items():
         lk = tf.transpose(lk)
         lk = tf.boolean_mask(lk, mask_unlabelled, axis=0)
         lk = tf.transpose(
             tf.reshape(lk, (self.n_labels, tf.shape(probs)[0], -1)))
         lk = tf.reduce_sum(lk * probs, axis=-1)
         llk_unlabelled[name + '_unlabelled'] = lk
     # kl-divergence
     div_unlabelled = {}
     for name, dv in div.items():
         dv = tf.transpose(dv)
         dv = tf.boolean_mask(dv, mask_unlabelled, axis=0)
         dv = tf.transpose(
             tf.reshape(dv, (self.n_labels, tf.shape(probs)[0], -1)))
         dv = tf.reduce_sum(dv * probs, axis=-1)
         div_unlabelled[name + '_unlabelled'] = dv
     div_unlabelled['kl_classifier'] = kl_divergence(pY_X,
                                                     self.labels.prior,
                                                     analytic=True)
     ### for labelled data, add the discriminative objective
     # log-likehood
     llk_labelled = {
         name + '_labelled':
         tf.transpose(tf.boolean_mask(tf.transpose(lk), mask, axis=0))
         for name, lk in llk.items()
     }
     # add the classification (discrimination) loss
     y_labelled = tf.boolean_mask(y, mask, axis=0)
     pY_X = self.classify(X_labelled)
     llk_labelled['llk_classifier'] = self.alpha * pY_X.log_prob(y_labelled)
     # kl-divergence
     div_labelled = {
         name + '_labelled':
         tf.transpose(tf.boolean_mask(tf.transpose(dv), mask, axis=0))
         for name, dv in div.items()
     }
     ### merge everything
     llk = {
         k: tf.reduce_mean(v)
         for k, v in dict(**llk_unlabelled, **llk_labelled).items()
     }
     div = {
         k: tf.reduce_mean(v)
         for k, v in dict(**div_unlabelled, **div_labelled).items()
     }
     return llk, div