def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior pm, pv = ut.gaussian_parameters(self.z_pre, dim=1) # # Generate samples. qm, qv = self.enc.encode(x) niwaes = [] recs = [] kls = [] for i in range(iw): z_sample = ut.sample_gaussian(qm, qv).view(-1, qm.shape[1]) rec = self.dec.decode(z_sample) logptheta_x_g_z = ut.log_bernoulli_with_logits(x, rec) logptheta_z = ut.log_normal_mixture(z_sample, pm, pv) logqphi_z_g_x = ut.log_normal(z_sample, qm, qv) niwae = logptheta_x_g_z + logptheta_z - logqphi_z_g_x # # Normal variables. rec = -ut.log_bernoulli_with_logits(x, rec) kl = ut.log_normal(z_sample, qm, qv) - ut.log_normal_mixture( z_sample, pm, pv) niwaes.append(niwae) recs.append(rec) kls.append(kl) niwaes = torch.stack(niwaes, -1) niwae = ut.log_mean_exp(niwaes, -1) kl = torch.stack(kls, -1) rec = torch.stack(recs, -1) ################################################################################ # End of code modification ################################################################################ return -niwae.mean(), kl.mean(), rec.mean()
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ print(x) m, v = self.enc.encode(x) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) batch_size, dim = m.shape # Compute KL term # km = torch.zeros(batch_size,self.k,dim) # kv = torch.ones(batch_size,self.k,dim) km = self.km.repeat(batch_size, 1, 1) kv = self.kv.repeat(batch_size, 1, 1) kl_vec = ut.log_normal(z, m, v) - ut.log_normal_mixture(z, km, kv) kl = torch.mean(kl_vec) # Compute reconstruction loss rec_vec = torch.neg(ut.log_bernoulli_with_logits(x, logits)) rec = torch.mean(rec_vec) # Compute nelbo nelbo = rec + kl return nelbo, kl, rec
def kl_elem(self, z, qm, qv): # Compute the mixture of Gaussian prior prior_m, prior_v = ut.gaussian_parameters(self.z_pre, dim=1) log_prob_net = ut.log_normal(z, qm, qv) log_prob_prior = ut.log_normal_mixture(z, prior_m, prior_v) # print("log_prob_net:", log_prob_net.mean(), "log_prob_prior:", log_prob_prior.mean()) kl_elem = log_prob_net - log_prob_prior return kl_elem
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) m, v = self.enc.encode(x) dist = Normal(loc=m, scale=torch.sqrt(v)) z_sample = dist.rsample(sample_shape=torch.Size([iw])) log_batch_z_sample_prob = [] kl_batch_z_sample = [] for i in range(iw): recon_logits = self.dec.decode(z_sample[i]) log_batch_z_sample_prob.append( ut.log_bernoulli_with_logits( x, recon_logits)) # [batch, z_sample] kl_batch_z_sample.append( ut.log_normal(z_sample[i], m, v) - ut.log_normal_mixture(z_sample[i], prior[0], prior[1])) log_batch_z_sample_prob = torch.stack(log_batch_z_sample_prob, dim=1) kl_batch_z_sample = torch.stack(kl_batch_z_sample, dim=1) niwae = -ut.log_mean_exp(log_batch_z_sample_prob - kl_batch_z_sample, dim=1).mean(dim=0) rec = -torch.mean(log_batch_z_sample_prob, dim=0) kl = torch.mean(kl_batch_z_sample, dim=0) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ m, v = self.enc.encode(x) batch_size, dim = m.shape # Duplicate m = ut.duplicate(m, iw) v = ut.duplicate(v, iw) x = ut.duplicate(x, iw) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) km = self.km.repeat(batch_size, 1, 1) kv = self.kv.repeat(batch_size, 1, 1) km = ut.duplicate(km, iw) kv = ut.duplicate(kv, iw) kl_vec = ut.log_normal(z, m, v) - ut.log_normal_mixture(z, km, kv) kl = torch.mean(kl_vec) # TODO: compute the values below rec_vec = ut.log_bernoulli_with_logits(x, logits) rec = torch.neg(torch.mean(rec_vec)) if iw > 1: iwtensor = torch.zeros(iw) j = 0 while j < iw: i = 0 sum = 0 while i < batch_size: sum += rec_vec[j * batch_size + i] i += 1 iwtensor[j] = sum / batch_size - kl j += 1 niwae = torch.neg(ut.log_mean_exp(iwtensor, 0)) else: niwae = rec + kl return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior (m, v) = self.enc.encode(x) # compute the encoder output #print(" ***** \n") #print("x xhape ", x.shape) #print("m and v shapes = ", m.shape, v.shape) prior = ut.gaussian_parameters(self.z_pre, dim=1) #print("prior shapes = ", prior[0].shape, prior[1].shape) z = ut.sample_gaussian(m, v) # sample a point from the multivariate Gaussian #print("shape of z = ",z.shape) logits = self.dec.decode(z) # pass the sampled "Z" through the decoder #print("logits shape = ", logits.shape) rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits), -1) # Calculate log Prob of the output log_prob = ut.log_normal(z, m, v) log_prob -= ut.log_normal_mixture(z, prior[0], prior[1]) kl = torch.mean(log_prob) rec = torch.mean(rec) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) prior_m, prior_v = prior batch = x.shape[0] qm, qv = self.enc.encode(x) # Now draw Zs from the posterior qm/qv z = ut.sample_gaussian(qm, qv) l_posterior = ut.log_normal(z, qm, qv) multi_m = prior_m.expand(batch, *prior_m.shape[1:]) multi_v = prior_v.expand(batch, *prior_v.shape[1:]) l_prior = ut.log_normal_mixture(z, multi_m, multi_v) kls = l_posterior - l_prior kl = torch.mean(kls) probs = self.dec.decode(z) recs = ut.log_bernoulli_with_logits(x, probs) rec = -1.0 * torch.mean(recs) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) q_m, q_v = self.enc.encode(x) q_m_, q_v_ = ut.duplicate(q_m, rep=iw), ut.duplicate(q_v, rep=iw) z_given_x = ut.sample_gaussian(q_m_, q_v_) decoded_bernoulli_logits = self.dec.decode(z_given_x) #duplicate x x_dup = ut.duplicate(x, rep=iw) rec = ut.log_bernoulli_with_logits(x_dup, decoded_bernoulli_logits) log_p_theta = ut.log_normal_mixture(z_given_x, prior[0], prior[1]) log_q_phi = ut.log_normal(z_given_x, q_m_, q_v_) kl = log_q_phi - log_p_theta niwae = rec - kl niwae = ut.log_mean_exp(niwae.reshape(iw, -1), dim=0) niwae = -torch.mean(niwae) #yay! ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) q_m, q_v = self.enc.encode(x) #print("q_m", q_m.size()) z_given_x = ut.sample_gaussian(q_m, q_v) decoded_bernoulli_logits = self.dec.decode(z_given_x) rec = -ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits) #rec = -torch.mean(rec) #terms for KL divergence log_q_phi = ut.log_normal(z_given_x, q_m, q_v) #print("log_q_phi", log_q_phi.size()) log_p_theta = ut.log_normal_mixture(z_given_x, prior[0], prior[1]) #print("log_p_theta", log_p_theta.size()) kl = log_q_phi - log_p_theta #print("kl", kl.size()) nelbo = torch.mean(kl + rec) rec = torch.mean(rec) kl = torch.mean(kl) ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) N_batches, dims = x.size() x = ut.duplicate(x, iw) q_mu, q_var = self.enc.encode(x) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp) probs = ut.log_bernoulli_with_logits(x, logits) kl_vals = -ut.log_normal(z_samp, q_mu, q_var) + ut.log_normal_mixture(z_samp, *prior) probs = probs + kl_vals niwae = torch.mean(-ut.log_mean_exp(probs.reshape(N_batches, iw), 1)) kl = torch.tensor(0) rec = torch.tensor(0) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # # Compute the mixture of Gaussian prior pm, pv = ut.gaussian_parameters(self.z_pre, dim=1) # # Generate samples. qm, qv = self.enc.encode(x) z_sample = ut.sample_gaussian(qm, qv) rec = self.dec.decode(z_sample) # # Compute loss. # KL divergence between the latent distribution and the prior. rec = -ut.log_bernoulli_with_logits(x, rec) # kl = ut.kl_normal(qm, qv, pm, pv) kl = ut.log_normal(z_sample, qm, qv) - ut.log_normal_mixture( z_sample, pm, pv) # # The liklihood of reproducing the sample image given the parameters. # Would need to take the average of this otherwise. nelbo = (kl + rec).mean() # NELBO: 89.24684143066406. KL: 10.346451759338379. Rec: 78.90038299560547 ################################################################################ # End of code modification ################################################################################ return nelbo, kl.mean(), rec.mean()
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) N_samp, dim = x.size() q_mu, q_var = self.enc.encode(x) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp) rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits)) kl = torch.mean(ut.log_normal(z_samp, q_mu, q_var) - ut.log_normal_mixture(z_samp, *prior)) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) m, v = self.enc.encode(x) m = ut.duplicate(m, iw) v = ut.duplicate(v, iw) x = ut.duplicate(x, iw) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) kl = ut.log_normal(z, m, v) - ut.log_normal_mixture(z, *prior) rec = -ut.log_bernoulli_with_logits(x, logits) nelbo = kl + rec niwae = -ut.log_mean_exp(-nelbo.reshape(iw, -1), dim=0) niwae, kl, rec = niwae.mean(), kl.mean(), rec.mean() ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) prior_m, prior_v = prior batch = x.shape[0] multi_x = ut.duplicate(x, iw) qm, qv = self.enc.encode(x) multi_qm = ut.duplicate(qm, iw) multi_qv = ut.duplicate(qv, iw) # z will be (batch*iw x z_dim) # with sampled z's for a given x non-contiguous! z = ut.sample_gaussian(multi_qm, multi_qv) probs = self.dec.decode(z) recs = ut.log_bernoulli_with_logits(multi_x, probs) rec = -1.0 * torch.mean(recs) multi_m = prior_m.expand(batch * iw, *prior_m.shape[1:]) multi_v = prior_v.expand(batch * iw, *prior_v.shape[1:]) z_priors = ut.log_normal_mixture(z, multi_m, multi_v) x_posteriors = recs z_posteriors = ut.log_normal(z, multi_qm, multi_qv) kls = z_posteriors - z_priors kl = torch.mean(kls) log_ratios = z_priors + x_posteriors - z_posteriors # Should be (batch*iw, z_dim), batch ratios non contiguous unflat_log_ratios = log_ratios.reshape(iw, batch) niwaes = ut.log_mean_exp(unflat_log_ratios, 0) niwae = -1.0 * torch.mean(niwaes) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec