def build_score_loss_entropy(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -ELBO = - ( E_{q(z; \lambda)} [ \log p(x, z) ] + H(q(z; \lambda)) ) based on the score function estimator. (Paisley et al., 2012) It assumes the entropy is analytic. Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ x = self.data z = self.variational.sample(self.n_samples) q_log_prob = self.variational.log_prob(stop_gradient(z)) p_log_prob = self.model.log_prob(x, z) q_entropy = self.variational.entropy() self.loss = tf.reduce_mean(p_log_prob) + q_entropy return -(tf.reduce_mean(q_log_prob * stop_gradient(p_log_prob)) + q_entropy)
def build_score_loss_kl(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -ELBO = - ( E_{q(z; \lambda)} [ \log p(x | z) ] + KL(q(z; \lambda) || p(z)) ) based on the score function estimator. (Paisley et al., 2012) It assumes the KL is analytic. It assumes the prior is :math:`p(z) = \mathcal{N}(z; 0, 1)`. Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ x = self.data z = self.variational.sample(self.n_samples) q_log_prob = self.variational.log_prob(stop_gradient(z)) p_log_lik = self.model.log_lik(x, z) mu = tf.pack([layer.loc for layer in self.variational.layers]) sigma = tf.pack([layer.scale for layer in self.variational.layers]) kl = kl_multivariate_normal(mu, sigma) self.loss = tf.reduce_mean(p_log_lik) - kl return -(tf.reduce_mean(q_log_prob * stop_gradient(p_log_lik)) - kl)
def build_score_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the score function estimator. (Paisley et al., 2012) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_samples`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data losses = [] for s in range(self.n_samples): z = self.variational.sample(self.K) p_log_prob = self.model.log_prob(x, z) q_log_prob = self.variational.log_prob(stop_gradient(z)) log_w = p_log_prob - q_log_prob losses += [log_mean_exp(log_w)] losses = tf.pack(losses) self.loss = tf.reduce_mean(losses) return -tf.reduce_mean(q_log_prob * stop_gradient(losses))
def build_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: KL( p(z |x) || q(z) ) = E_{p(z | x)} [ \log p(z | x) - \log q(z; \lambda) ] based on importance sampling. Computed as .. math:: 1/B \sum_{b=1}^B [ w_{norm}(z^b; \lambda) * (\log p(x, z^b) - \log q(z^b; \lambda) ] where .. math:: z^b \sim q(z^b; \lambda) w_{norm}(z^b; \lambda) = w(z^b; \lambda) / \sum_{b=1}^B ( w(z^b; \lambda) ) w(z^b; \lambda) = p(x, z^b) / q(z^b; \lambda) which gives a gradient .. math:: - 1/B \sum_{b=1}^B w_{norm}(z^b; \lambda) \partial_{\lambda} \log q(z^b; \lambda) """ x = self.data z = self.variational.sample(self.n_samples) # normalized importance weights q_log_prob = self.variational.log_prob(stop_gradient(z)) log_w = self.model.log_prob(x, z) - q_log_prob log_w_norm = log_w - log_sum_exp(log_w) w_norm = tf.exp(log_w_norm) self.loss = tf.reduce_mean(w_norm * log_w) return -tf.reduce_mean(q_log_prob * stop_gradient(w_norm))
def build_score_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -ELBO = -E_{q(z; \lambda)} [ \log p(x, z) - \log q(z; \lambda) ] based on the score function estimator. (Paisley et al., 2012) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ x = self.data z = self.variational.sample(self.n_samples) q_log_prob = self.variational.log_prob(stop_gradient(z)) losses = self.model.log_prob(x, z) - q_log_prob self.loss = tf.reduce_mean(losses) return -tf.reduce_mean(q_log_prob * stop_gradient(losses))