def optimizer_step(self, sample):
        sample_observation_initial_context, sample_action_T, sample_next_observation_T, sample_reward_T = sample
        image_probs, reward_probs = self.model.forward_multiple(
            sample_observation_initial_context, sample_action_T)

        # reward loss
        true_reward = numerical_reward_to_bit_array(
            sample_reward_T, self.reward_prediction_bits, self.use_cuda)
        reward_loss = self.reward_criterion(reward_probs, true_reward)

        # image loss
        reconstruction_loss = self.frame_criterion(image_probs,
                                                   sample_next_observation_T)

        loss = reconstruction_loss + self.reward_loss_coef * reward_loss

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # The minimal cross entropy between the distributions p and q is the entropy of p
        # so if they are equal the loss is equal to the distribution of p
        true_entropy = Bernoulli(probs=sample_next_observation_T).entropy()
        normalized_frame_loss = reconstruction_loss - true_entropy.mean()
        return (normalized_frame_loss, reward_loss), (image_probs,
                                                      reward_probs)
예제 #2
0
def bald_acq(obj_samples):
    # the output of objective is of shape num_samples x batch_shape x d_out
    mean_p = obj_samples.mean(dim=0)
    posterior_entropies = Bernoulli(mean_p).entropy().squeeze(-1)
    sample_entropies = Bernoulli(obj_samples).entropy()
    conditional_entropies = sample_entropies.mean(dim=0).squeeze(-1)

    return posterior_entropies - conditional_entropies
    def optimizer_step(self, sample):
        sample_observation_initial_context, sample_action_T, sample_next_observation_T, sample_reward_T = sample

        image_probs, reward_probs, \
        (total_z_mu_prior, total_z_sigma_prior, total_z_mu_posterior, total_z_sigma_posterior) \
            = self.model.forward_multiple(sample_observation_initial_context, sample_action_T)

        # reward loss
        true_reward = numerical_reward_to_bit_array(
            sample_reward_T, self.reward_prediction_bits, self.use_cuda)
        reward_loss = self.reward_criterion(reward_probs, true_reward)

        # image loss
        reconstruction_loss = self.frame_criterion(image_probs,
                                                   sample_next_observation_T)

        prior_gaussian = Normal(loc=total_z_mu_prior,
                                scale=total_z_sigma_prior)
        posterior_gaussian = Normal(loc=total_z_mu_posterior,
                                    scale=total_z_sigma_posterior)
        kl_div_loss = torch.distributions.kl.kl_divergence(
            prior_gaussian, posterior_gaussian)
        # loss is Evidence Lower Bound (ELBO) L = log p(X) − KL [q(Z)kp(Z|X)]
        frame_loss = reconstruction_loss + kl_div_loss.mean()

        loss = frame_loss + self.reward_loss_coef * reward_loss

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # The minimal cross entropy between the distributions p and q is the entropy of p
        # so if they are equal the loss is equal to the distribution of p
        true_entropy = Bernoulli(probs=sample_next_observation_T).entropy()
        entropy_normalized_loss = reconstruction_loss - true_entropy.mean()
        normalized_frame_loss = entropy_normalized_loss + kl_div_loss.mean()
        return (normalized_frame_loss, reward_loss), (image_probs,
                                                      reward_probs)
def bald_acq(obj_samples: torch.Tensor) -> torch.Tensor:
    """Evaluate Mutual Information acquisition function. 

    With latent function F and X a hypothetical observation at a new point,
    I(F; X) = I(X; F) = H(X) - H(X |F),
    H(X |F ) = E_{f} (H(X |F =f )
    i.e., we take the posterior entropy of the (Bernoulli) observation X given the
    current model posterior and subtract the conditional entropy on F, that being
    the mean entropy over the posterior for F. This is equivalent to the BALD
    acquisition function in Houlsby et al. NeurIPS 2012.

    Args:
        obj_samples (torch.Tensor): Objective samples from the GP, of
            shape num_samples x batch_shape x d_out

    Returns:
        torch.Tensor: Value of acquisition at samples.
    """
    mean_p = obj_samples.mean(dim=0)
    posterior_entropies = Bernoulli(mean_p).entropy().squeeze(-1)
    sample_entropies = Bernoulli(obj_samples).entropy()
    conditional_entropies = sample_entropies.mean(dim=0).squeeze(-1)

    return posterior_entropies - conditional_entropies