def test_all_finite_raises(self): with self.test_session(): x = np.inf * tf.constant([-1.0, -2.0, -3.0, -4.0]) with self.assertRaisesOpError('Inf'): log_mean_exp(x).eval() x = tf.constant([-1.0, np.nan, -3.0, -4.0]) with self.assertRaisesOpError('NaN'): log_mean_exp(x).eval()
def test_log_mean_exp_2d(self): with self.test_session(): x = tf.constant([[-1.0], [-2.0], [-3.0], [-4.0]]) self.assertAllClose(log_mean_exp(x).eval(), -1.9461046625586951) x = tf.constant([[-1.0, -2.0], [-3.0, -4.0]]) self.assertAllClose(log_mean_exp(x).eval(), -1.9461046625586951) self.assertAllClose( log_mean_exp(x, 0).eval(), np.array([-1.5662191695169727, -2.5662191695169727])) self.assertAllClose( log_mean_exp(x, 1).eval(), np.array([-1.3798854930417224, -3.3798854930417224]))
def test_log_mean_exp_2d(self): with self.test_session(): x = tf.constant([[-1.0], [-2.0], [-3.0], [-4.0]]) self.assertAllClose(log_mean_exp(x).eval(), -1.9461046625586951) x = tf.constant([[-1.0, -2.0], [-3.0, -4.0]]) self.assertAllClose(log_mean_exp(x).eval(), -1.9461046625586951) self.assertAllClose(log_mean_exp(x, 0).eval(), np.array([-1.5662191695169727, -2.5662191695169727])) self.assertAllClose(log_mean_exp(x, 1).eval(), np.array([-1.3798854930417224, -3.3798854930417224]))
def build_reparam_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the reparameterization trick. (Kingma and Welling, 2014) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_samples`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data for s in range(self.n_samples): z = self.variational.sample(self.K) p_log_prob = self.model.log_prob(x, z) q_log_prob = self.variational.log_prob(z) log_w = p_log_prob - q_log_prob losses += [log_mean_exp(log_w)] losses = tf.pack(losses) self.loss = tf.reduce_mean(losses) return -self.loss
def build_reparam_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the reparameterization trick. (Kingma and Welling, 2014) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_minibatch`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data for s in range(self.n_minibatch): z = self.variational.sample(self.K) p_log_prob = self.model.log_prob(x, z) q_log_prob = self.variational.log_prob(z) log_w = p_log_prob - q_log_prob losses += [log_mean_exp(log_w)] losses = tf.pack(losses) self.loss = tf.reduce_mean(losses) return -self.loss
def build_score_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the score function estimator. (Paisley et al., 2012) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_samples`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data losses = [] for s in range(self.n_samples): z = self.variational.sample(self.K) p_log_prob = self.model.log_prob(x, z) q_log_prob = self.variational.log_prob(stop_gradient(z)) log_w = p_log_prob - q_log_prob losses += [log_mean_exp(log_w)] losses = tf.pack(losses) self.loss = tf.reduce_mean(losses) return -tf.reduce_mean(q_log_prob * stop_gradient(losses))
def build_loss_and_gradients(self, var_list): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the score function estimator. (Paisley et al., 2012) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_samples`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data # Form n_samples x K matrix of log importance weights. log_w = [] for s in range(self.n_samples * self.K): z_sample = {} q_log_prob = 0.0 for z, qz in six.iteritems(self.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope='inference_' + str(s)) z_sample[z] = qz_copy.value() q_log_prob += tf.reduce_sum( qz.log_prob(tf.stop_gradient(z_sample[z]))) p_log_prob = self.model_wrapper.log_prob(x, z_sample) log_w += [p_log_prob - q_log_prob] log_w = tf.reshape(log_w, [self.n_samples, self.K]) # Take log mean exp across importance weights (columns). losses = log_mean_exp(log_w, 1) loss = -tf.reduce_mean(losses) if var_list is None: var_list = tf.trainable_variables() grads = tf.gradients( -tf.reduce_mean(q_log_prob * tf.stop_gradient(losses)), [v.ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars
def build_loss_and_gradients(self, var_list): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the score function estimator. (Paisley et al., 2012) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_samples`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data # Form n_samples x K matrix of log importance weights. log_w = [] for s in range(self.n_samples * self.K): z_sample = {} q_log_prob = 0.0 for z, qz in six.iteritems(self.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope='inference_' + str(s)) z_sample[z] = qz_copy.value() q_log_prob += tf.reduce_sum(qz.log_prob(tf.stop_gradient(z_sample[z]))) p_log_prob = self.model_wrapper.log_prob(x, z_sample) log_w += [p_log_prob - q_log_prob] log_w = tf.reshape(log_w, [self.n_samples, self.K]) # Take log mean exp across importance weights (columns). losses = log_mean_exp(log_w, 1) loss = -tf.reduce_mean(losses) if var_list is None: var_list = tf.trainable_variables() grads = tf.gradients( -tf.reduce_mean(q_log_prob * tf.stop_gradient(losses)), [v.ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars
def build_loss_and_gradients(self, var_list): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the reparameterization trick. """ # Form vector of K log importance weights. log_w = [] for k in range(self.K): scope = 'inference_' + str(id(self)) + '/' + str(k) z_sample = {} q_log_prob = 0.0 for z, qz in six.iteritems(self.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope=scope) z_sample[z] = qz_copy q_log_prob += tf.reduce_sum(qz_copy.log_prob(qz_copy)) p_log_prob = 0.0 for z in six.iterkeys(self.latent_vars): # Copy p(z), swapping its conditioning set with samples # from variational distribution. z_copy = copy(z, z_sample, scope=scope) p_log_prob += tf.reduce_sum(z_copy.log_prob(z_sample[z])) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable): # Copy p(x | z), swapping its conditioning set with samples # from variational distribution. x_copy = copy(x, z_sample, scope=scope) p_log_prob += tf.reduce_sum(x_copy.log_prob(qx)) log_w += [p_log_prob - q_log_prob] loss = -log_mean_exp(log_w) grads = tf.gradients(loss, [v.ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars
def build_reparam_loss(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -E_{q(z^1; \lambda), ..., q(z^K; \lambda)} [ \log 1/K \sum_{k=1}^K p(x, z^k)/q(z^k; \lambda) ] based on the reparameterization trick. (Kingma and Welling, 2014) Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. Note there is a difference between the number of samples to approximate the expectations (`n_samples`) and the number of importance samples to determine how many expectations (`K`). """ x = self.data # Form n_samples x K matrix of log importance weights. log_w = [] for s in range(self.n_samples * self.K): z_sample = {} q_log_prob = 0.0 for z, qz in six.iteritems(self.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope='inference_' + str(s)) z_sample[z] = qz_copy.value() q_log_prob += tf.reduce_sum(qz.log_prob(z_sample[z])) p_log_prob = self.model_wrapper.log_prob(x, z_sample) log_w += [p_log_prob - q_log_prob] log_w = tf.reshape(log_w, [self.n_samples, self.K]) # Take log mean exp across importance weights (columns). losses = log_mean_exp(log_w, 1) self.loss = tf.reduce_mean(losses) return -self.loss
def test_log_mean_exp_1d(self): with self.test_session(): x = tf.constant([-1.0, -2.0, -3.0, -4.0]) self.assertAllClose(log_mean_exp(x).eval(), -1.9461046625586951)
def test_2d(): x = tf.constant([[-1.0], [-2.0], [-3.0], [-4.0]]) val_ed = log_mean_exp(x) val_true = -1.9461046625586951 assert np.allclose(val_ed.eval(), val_true)
def test_log_mean_exp_2d(self): with self.test_session(): x = tf.constant([[-1.0], [-2.0], [-3.0], [-4.0]]) self.assertAllClose(log_mean_exp(x).eval(), -1.9461046625586951)