def negative_elbo_bound(self, x, y): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ if self.CNN: m_xy, v_xy = self.enc_xy.encode_xy(x, y) m_x, v_x = self.enc_x.encode_x(x) m_y, v_y = self.enc_y.encode_y(y) else: m_xy, v_xy = self.enc_xy.encode(x, y) m_x, v_x = self.enc_x.encode(x) m_y, v_y = self.enc_y.encode(y) # kl divergence for latent variable z kl_xy_x = ut.kl_normal(m_xy, v_xy, m_x, v_x) kl_xy_y = ut.kl_normal(m_xy, v_xy, m_y, v_y) # recreation error z = ut.sample_gaussian(m_xy, v_xy) x_logits = self.dec.decode(z) if self.CNN: x = torch.reshape(x, (x.shape[0], -1)) rec = -ut.log_bernoulli_with_logits(x, x_logits) kl_xy_x = kl_xy_x.mean() kl_xy_y = kl_xy_y.mean() rec = rec.mean() nelbo = kl_xy_x * self.kl_xy_x_weight + kl_xy_y * self.kl_xy_y_weight + rec * self.rec_weight ################################################################################ # End of code modification ################################################################################ return nelbo, kl_xy_x, kl_xy_y, rec, m_xy, v_xy
def negative_elbo_bound(self, x, y): ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that we are interested in the ELBO of ln p(x | y) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ q_mu, q_var = self.enc.encode(x, y) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp, y) rec = -torch.mean( ut.log_normal(x, logits, 0.1 * torch.ones_like(logits))) kl_z = torch.mean( ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu), torch.ones_like(q_var))) nelbo = kl_z + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, rec
def negative_elbo_bound(self, x, y): ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that we are interested in the ELBO of ln p(x | y) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ m, v = self.enc.encode(x, y) z = ut.sample_gaussian(m, v) x_m = self.dec.decode(z, y) rec = ut.log_normal(x, x_m, self.x_v.expand(x_m.size())).mean() kl_z = ut.kl_normal(m, v, self.z_prior_m.expand(m.size()), self.z_prior_v.expand(v.size())).mean() nelbo = kl_z - rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, rec
def negative_elbo_bound(self, x, y): ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that we are interested in the ELBO of ln p(x | y) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # sample z m, v = self.enc.encode(x, y) z = ut.sample_gaussian(m, v) # generate x given z,y x_logits = self.dec.decode(z, y) # kl on q(z) kl_z = ut.kl_normal(m, v, self.z_prior[0], self.z_prior[1]) rec_loss = -ut.log_normal(x, x_logits, 0.1 * torch.ones_like(x_logits)) kl_z, rec_loss = rec_loss.mean(), kl_z.mean() nelbo = rec_loss + kl_z ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, rec_loss
def negative_elbo_bound(self, x, y): ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that we are interested in the ELBO of ln p(x | y) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) qm, qv = self.enc.encode(x, y) z = ut.sample_gaussian(qm, qv) recon_m = self.dec.decode( z, y) # mu: [batch, dim_x] sigma: [batch, 1/10diag(dim_x)] dist = Normal(loc=recon_m, scale=1 / 10 * torch.ones_like(x)) log_prob = dist.log_prob(x).sum(dim=1) rec = -torch.mean(log_prob, dim=0) kl_z = torch.mean(ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v), dim=0) nelbo = rec + kl_z ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ m, v = self.enc.encode(x) z = ut.sample_gaussian(m, v) recon_logits = self.dec.decode(z) rec = -torch.mean(ut.log_bernoulli_with_logits(x, recon_logits), dim=0) kl = torch.mean(ut.kl_normal(m, v, torch.zeros_like(m), torch.ones_like(v)), dim=0) nelbo = rec + kl ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ m, v = self.enc.encode(x) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) # TODO: compute the values below # The model priors for a VAE are standard Mean and Variance pm = torch.zeros((m.shape)) pv = torch.ones((v.shape)) # Compute the KL divergence from the calculated m and v given x to the priors kl = torch.mean(ut.kl_normal(m, v, pm, pv)) # Calculate the reconstruction loss of p(x|z) = log(Bern(x|decoder(z))) rec = torch.mean(ut.log_bernoulli_with_logits(x, logits)) # Negative ELBO definition nelbo = kl - rec return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logits, dim=1) # (batch, y_dim) p_y = 1 / 10 * torch.ones_like(y_prob) kl_y = torch.mean(ut.kl_cat(y_prob, y_logprob, torch.log(p_y)), dim=0) batch_size = x.shape[0] # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) qm, qv = self.enc.encode(x, y) z = ut.sample_gaussian(qm, qv) recon_logits = self.dec.decode(z, y) p_x_given_yz = ut.log_bernoulli_with_logits(x, recon_logits) p_x_given_yz = p_x_given_yz.reshape(self.y_dim, batch_size).transpose( 0, 1) #[batch, 10] rec = -torch.mean(torch.sum(p_x_given_yz * y_prob, dim=1), dim=0) kl_z_over_xy = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v) kl_z_over_xy = kl_z_over_xy.reshape(self.y_dim, batch_size).transpose(0, 1) kl_z = torch.mean(torch.sum(kl_z_over_xy * y_prob, dim=1), dim=0) nelbo = rec + kl_y + kl_z ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x, beta): z_given_x, qmu0, qvar0, qmu1, qvar1 = self.Encoder(x) decoded_bernoulli_logits, pmu0, pvar0 = self.Decoder(z_given_x) rec = ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits) #print(rec.shape) rec = -torch.mean(rec) pm, pv = torch.zeros(qmu1.shape), torch.ones(qvar1.shape) #print("mu1", mu1) kl1 = ut.kl_normal(qmu1, qvar1, pm, pv) kl2 = ut.kl_normal(qmu0, qvar0, pmu0, pvar0) kl = beta * torch.mean(kl1 + kl2) nelbo = rec + kl #nelbo = rec return nelbo, rec, kl
def loss_encoder(self, x): m, v = self.enc.encode(x) kl = ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v).mean() # nelbo, kl, rec = self.negative_iwae_bound(x, 10) loss = kl summaries = dict((('gen/kl_z', kl), )) return loss, summaries
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logits, dim=1) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) # sample z from x and y qm, qv = self.enc.encode(x, y) z = ut.sample_gaussian(qm, qv) # compute kl x_logits = self.dec.decode(z, y) kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)) kl_z = ut.kl_normal(qm, qv, self.z_prior[0], self.z_prior[1]) rec_loss = -ut.log_bernoulli_with_logits(x, x_logits) # (y_dim * batch) # Compute the expected reconstruction and kl base on the distribution q(y|x), q(y,z|x) rec_loss_y = (y_prob.t() * rec_loss.reshape(self.y_dim, -1)).sum(0) kl_z_y = (y_prob.t() * kl_z.reshape(self.y_dim, -1)).sum(0) # Reduce to means kl_y, kl_z, rec = kl_y.mean(), kl_z_y.mean(), rec_loss_y.mean() nelbo = rec + kl_z + kl_y ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) # # Generate samples. qm, qv = self.enc.encode(x, y) z_sample = ut.sample_gaussian(qm, qv) xprime = self.dec.decode(z_sample, y) # # Compute loss. y_prior = torch.ones_like(y_logprob) / self.y_dim kl_y = ut.kl_cat(y_prob, y_logprob, y_prior) # # Data is duplicated in a way to make the batch dimension second. kl_z = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v).view(self.y_dim, -1) rec = -ut.log_bernoulli_with_logits(x, xprime).view(self.y_dim, -1) # # Swap axis where the probabilitiees are to match the new batch dimensions. nelbo = kl_y + (y_prob.t() * (kl_z + rec)).sum(0) nelbo = nelbo.mean() # Test set classification accuracy: 0.8104000091552734 ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar numSamples = x.size()[0] # Calculate the KL Divergence term # First, find the variational posterior mean and variance qm, qv = self.enc.encode(x) # Next, note that the marginal for Z is always the standard normal pm = torch.zeros([numSamples, self.z_dim], dtype=torch.float) pv = torch.ones([numSamples, self.z_dim], dtype=torch.float) # Now we compute the KL Divergence # Divide by numSamples to get the average kl = torch.sum(ut.kl_normal(qm, qv, pm, pv)) / numSamples # Approximate the reconstruction term # First, sample from the variational posterior zSample = ut.sample_gaussian(qm, qv) # Next, we pass the sample through the decoder to get # parameters for the pixel Bernoullis bernoulliParams = self.dec.decode(zSample) # Now create the approximation logProbForEachSample = ut.log_bernoulli_with_logits(x, bernoulliParams) rec = -1 * torch.sum(logProbForEachSample) / numSamples # nelbo is just kl + rec nelbo = kl + rec ################################################################################ ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ m, v = self.enc.encode(x) # Duplicate m = ut.duplicate(m, iw) v = ut.duplicate(v, iw) x = ut.duplicate(x, iw) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) # TODO: compute the values below # Get KL and Rec of elbo again pm = torch.zeros((m.shape)) pv = torch.ones((v.shape)) kl = ut.kl_normal(m, v, pm, pv) rec = ut.log_bernoulli_with_logits(x, logits) # Now get the log mean of the exp of the KL divergence and subtact the # reconstuction from all of the weighted samples niwae = ut.log_mean_exp(ut.kl_normal(m, v, pm, pv), dim=0) - torch.mean( ut.log_bernoulli_with_logits(x, logits)) return niwae, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ m, v = self.enc.encode(x) dist = Normal(loc=m, scale=torch.sqrt(v)) z_sample = dist.rsample(sample_shape=torch.Size([iw])) log_batch_z_sample = [] kl_batch_z_sample = [] for i in range(iw): recon_logits = self.dec.decode(z_sample[i]) log_batch_z_sample.append( ut.log_bernoulli_with_logits( x, recon_logits)) # [batch, z_sample] kl_batch_z_sample.append( ut.kl_normal(m, v, torch.zeros_like(m), torch.ones_like(v))) log_batch_z_sample = torch.stack(log_batch_z_sample, dim=1) kl_batch_z_sample = torch.stack(kl_batch_z_sample, dim=1) niwae = -ut.log_mean_exp(log_batch_z_sample - kl_batch_z_sample, dim=1).mean(dim=0) rec = -torch.mean(log_batch_z_sample, dim=0) # over batch kl = torch.mean(kl_batch_z_sample, dim=0) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ N_batches, dims = x.size() x = ut.duplicate(x, iw) q_mu, q_var = self.enc.encode(x) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp) probs = ut.log_bernoulli_with_logits(x, logits) log_vals = -ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu), torch.ones_like(q_var)) # log_vals = ut.log_normal(z_samp, torch.zeros_like(q_mu), torch.ones_like(q_var)) - ut.log_normal(z_samp, q_mu, q_var) probs = probs + log_vals niwae = torch.mean(-ut.log_mean_exp(probs.reshape(N_batches, iw), 1)) kl = torch.tensor(0) rec = torch.tensor(0) # niwae = kl + rec ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ ################################################################################ # End of code modification ################################################################################ #sample z from encoder distribution q_m, q_v = self.enc.encode(x) #print("q_m", q_m.size()) z_given_x = ut.sample_gaussian(q_m, q_v) decoded_bernoulli_logits = self.dec.decode(z_given_x) rec = ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits) rec = -torch.mean(rec) p_m, p_v = torch.zeros(q_m.shape), torch.ones(q_v.shape) kl = ut.kl_normal(q_m, q_v, p_m, p_v) kl = torch.mean(kl) nelbo = rec + kl #kl = ut.kl_normal(q_m, q_v, p_m, p_v) #print("kl_size", kl.size()) #nelbo = (-rec + kl)*torch.tensor(1/x.size(0)) return nelbo, kl, rec
def negative_elbo_bound_gumbel(self, x, tau): """ Gumbel-softmax version. Not slated for release. """ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = F.softmax(y_logits, dim=1) y = ut.gumbel_softmax(y_logits, tau) m, v = self.enc.encode(x, y) z = ut.sample_gaussian(m, v) x_logits = self.dec.decode(z, y) kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)).mean() kl_z = ut.kl_normal(m, v, self.z_prior[0], self.z_prior[1]).mean() rec = -ut.log_bernoulli_with_logits(x, x_logits).mean() nelbo = kl_y + kl_z + rec return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # 1. get latent distribution and one sample. m, v = self.enc.encode(x) z = ut.sample_gaussian(m, v) x_logits = self.dec.decode(z) # 2. get KL divergent of q(z|x) and p(z). (assume z belongs to standard guassian distribution) pz_m, pz_v = self.z_prior[0], self.z_prior[1] kl_loss = ut.kl_normal(m, v, pz_m, pz_v) # 3. reconstruct loss, encourage x = x_hat r_loss = ut.log_bernoulli_with_logits(x, x_logits) nelbo = -1 * (r_loss - kl_loss) nelbo, kl, r = nelbo.mean(), kl_loss.mean(), -1 * r_loss.mean() return nelbo, kl, r ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # the first dimension of m and v is batch # each input data generate a normal distribution m, v = self.enc.encode(x) kl = ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) # get p(x|z) since logit is from latent variable z rec = -ut.log_bernoulli_with_logits(x, logits) kl = kl.mean() rec = rec.mean() nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ (m, v) = self.enc.encode(x) # compute the encoder outut kl = torch.mean(ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v), -1) z = ut.sample_gaussian( m, v) #sample a point from the multivariate Gaussian logits = self.dec.decode(z) #pass the sampled "Z" through the decoder rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits), -1) #Calculate log Prob of the output nelbo = torch.mean(kl + rec) kl = torch.mean(kl) rec = torch.mean(rec) ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ phi = self.enc.encode(x) z_hat = ut.sample_gaussian(*phi) kl = ut.kl_normal(*phi, *self.z_prior).mean() # prior = ut.gaussian_parameters(self.z_pre, dim=1) # # q = self.enc.encode(x) # z_hat = ut.sample_gaussian(*q) # # kl = ut.log_normal(z_hat, *q) - ut.log_normal_mixture(z_hat, *prior) # kl = kl.mean() rec = -ut.log_bernoulli_with_logits(x, self.dec.decode(z_hat)).mean() nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ qm, qv = self.enc.encode(x) pm = self.z_prior[0].expand(qm.shape) pv = self.z_prior[1].expand(qv.shape) kls = ut.kl_normal(qm, qv, pm, pv) kl = torch.mean(kls) z = ut.sample_gaussian(qm, qv) probs = self.dec.decode(z) recs = ut.log_bernoulli_with_logits(x, probs) rec = -1.0 * torch.mean(recs) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def kl_elem(self, z, qm, qv): kl_elem = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v) return kl_elem
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # m, v = self.enc.encode(x) # # # expand m to iw samples # m_iw = ut.duplicate(m, iw) # v_iw = ut.duplicate(v, iw) # x_iw = ut.duplicate(x, iw) # # # sample z [iw] # z = ut.sample_gaussian(m_iw, v_iw) # x_logits = self.dec.decode(z) # # # reconstruct loss # rec_loss = -ut.log_bernoulli_with_logits(x_iw, x_logits) # # # kl # kl = ut.log_normal(z, m, v) - ut.log_normal(z, self.z_prior[0], self.z_prior[1]) # # # iw nelbo # nelbo = kl + rec_loss # # niwae = -ut.log_mean_exp(-nelbo.reshape(iw, -1), dim=0) # niwae, kl, rec = niwae.mean(), kl.mean(), rec_loss.mean() m, v = self.enc.encode(x) dist = Normal(loc=m, scale=torch.sqrt(v)) z_iw = dist.rsample(sample_shape=torch.Size([iw])) log_z_batch, kl_z_batch = [], [] # for each z sample for i in range(iw): recon_logits = self.dec.decode(z_iw[i]) log_z_batch.append(ut.log_bernoulli_with_logits(x, recon_logits)) # [batch, z_sample] kl_z_batch.append(ut.kl_normal(m, v, torch.zeros_like(m), torch.ones_like(v))) # aggregate result together log_z = torch.stack(log_z_batch, dim=1) kl_z = torch.stack(kl_z_batch, dim=1) niwae = -ut.log_mean_exp(log_z - kl_z, dim=1).mean(dim=0) rec_loss = -torch.mean(log_z, dim=0) # over batch kl = torch.mean(kl_z, dim=0) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec_loss
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ batch = x.shape[0] multi_x = ut.duplicate(x, iw) qm, qv = self.enc.encode(x) multi_qm = ut.duplicate(qm, iw) multi_qv = ut.duplicate(qv, iw) # z will be (batch*iw x z_dim) # with sampled z's for a given x non-contiguous! z = ut.sample_gaussian(multi_qm, multi_qv) probs = self.dec.decode(z) recs = ut.log_bernoulli_with_logits(multi_x, probs) rec = -1.0 * torch.mean(recs) multi_pm = self.z_prior[0].expand(multi_qm.shape) multi_pv = self.z_prior[1].expand(multi_qv.shape) z_priors = ut.log_normal(z, multi_pm, multi_pv) x_posteriors = recs z_posteriors = ut.log_normal(z, multi_qm, multi_qv) log_ratios = z_priors + x_posteriors - z_posteriors # Should be (batch*iw, z_dim), batch ratios non contiguous unflat_log_ratios = log_ratios.reshape(iw, batch) niwaes = ut.log_mean_exp(unflat_log_ratios, 0) niwae = -1.0 * torch.mean(niwaes) pm = self.z_prior[0].expand(qm.shape) pv = self.z_prior[1].expand(qv.shape) kls = ut.kl_normal(qm, qv, pm, pv) kl = torch.mean(kls) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x, label, mask=None, sample=False, adj=None, lambdav=0.001): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ assert label.size()[1] == self.z1_dim q_m, q_v = self.enc.encode(x.to(device)) q_m, q_v = q_m.reshape([q_m.size()[0], self.z1_dim, self.z2_dim ]), torch.ones(q_m.size()[0], self.z1_dim, self.z2_dim).to(device) decode_m, decode_v = self.dag.calculate_dag( q_m.to(device), torch.ones(q_m.size()[0], self.z1_dim, self.z2_dim).to(device)) decode_m, decode_v = decode_m.reshape( [q_m.size()[0], self.z1_dim, self.z2_dim]), decode_v if sample == False: if mask != None and mask in [0, 1, 3]: z_mask = torch.ones(q_m.size()[0], self.z1_dim, self.z2_dim).to(device) * adj decode_m[:, mask, :] = z_mask[:, mask, :] decode_v[:, mask, :] = z_mask[:, mask, :] m_zm, m_zv = self.dag.mask_z(decode_m.to(device)).reshape([ q_m.size()[0], self.z1_dim, self.z2_dim ]), decode_v.reshape([q_m.size()[0], self.z1_dim, self.z2_dim]) m_u = self.dag.mask_u(label.to(device)) #mask f_z = self.mask_z.mix(m_zm).reshape( [q_m.size()[0], self.z1_dim, self.z2_dim]).to(device) e_tilde = self.attn.attention( decode_m.reshape([q_m.size()[0], self.z1_dim, self.z2_dim]).to(device), q_m.reshape([q_m.size()[0], self.z1_dim, self.z2_dim]).to(device))[0] f_z1 = f_z + e_tilde if mask != None and mask == 2: z_mask = torch.ones(q_m.size()[0], self.z1_dim, self.z2_dim).to(device) * adj f_z1[:, mask, :] = z_mask[:, mask, :] m_zv[:, mask, :] = z_mask[:, mask, :] g_u = self.mask_u.mix(m_u).to(device) m_zv = torch.ones([q_m.size()[0], self.z1_dim, self.z2_dim]).to(device) z_given_dag = ut.conditional_sample_gaussian(f_z1, q_v * lambdav) decoded_bernoulli_logits, x1, x2, x3, x4 = self.dec.decode_sep( z_given_dag.reshape([z_given_dag.size()[0], self.z_dim]), label.to(device)) rec = ut.log_bernoulli_with_logits( x, decoded_bernoulli_logits.reshape(x.size())) rec = -torch.mean(rec) p_m, p_v = torch.zeros(q_m.size()), torch.ones(q_m.size()) cp_m, cp_v = ut.condition_prior(self.scale, label, self.z2_dim) cp_v = torch.ones([q_m.size()[0], self.z1_dim, self.z2_dim]).to(device) cp_z = ut.conditional_sample_gaussian(cp_m.to(device), cp_v.to(device)) kl = torch.zeros(1).to(device) kl = 0.3 * ut.kl_normal( q_m.view(-1, self.z_dim).to(device), q_v.view(-1, self.z_dim).to(device), p_m.view(-1, self.z_dim).to(device), p_v.view(-1, self.z_dim).to(device)) for i in range(self.z1_dim): kl = kl + 1 * ut.kl_normal( decode_m[:, i, :].to(device), cp_v[:, i, :].to(device), cp_m[:, i, :].to(device), cp_v[:, i, :].to(device)) kl = torch.mean(kl) mask_kl = torch.zeros(1).to(device) mask_kl2 = torch.zeros(1).to(device) for i in range(self.z1_dim): mask_kl = mask_kl + 1 * ut.kl_normal( f_z1[:, i, :].to(device), cp_v[:, i, :].to(device), cp_m[:, i, :].to(device), cp_v[:, i, :].to(device)) u_loss = torch.nn.MSELoss() mask_l = torch.mean(mask_kl) + u_loss(g_u, label.float().to(device)) nelbo = rec + kl + mask_l return nelbo, kl, rec, decoded_bernoulli_logits.reshape( x.size()), z_given_dag
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) #1000,10. 0,100,200 dupe x = ut.duplicate(x, self.y_dim) #1000,784. 0,100,200 dupe #100x10 y_prior = torch.tensor([0.1]).expand_as(y_prob).to(device) y_logprior = torch.log(y_prior) #(batch size,) kl_ys = ut.kl_cat(y_prob, y_logprob, y_logprior) kl_y = torch.mean(kl_ys) #1000 x 64. Still 0,100,200 corresponding... zqm, zqv = self.enc.encode(x, y) zpm = self.z_prior_m.expand_as(zqm) zpv = self.z_prior_v.expand_as(zqv) #so the zpm, zpv go as x quickly, y slowly #equivalent to y being the 0th dimension #(batch_size * y_dim,) kl_zs_flat = ut.kl_normal(zqm, zqv, zpm, zpv) kl_zs = kl_zs_flat.reshape(10,100).t() kl_zs_weighted = kl_zs * y_prob batch_kl_zs = kl_zs_weighted.sum(1) kl_z = batch_kl_zs.mean() #1000 x 64 z = ut.sample_gaussian(zqm, zqv) #1000 x 784 probs = self.dec.decode(z, y) #(batch_size * y_dim,) recs_flat = -1.0 * ut.log_bernoulli_with_logits(x, probs) recs = recs_flat.reshape(10,100).t() recs_weighted = recs * y_prob batch_recs = recs_weighted.sum(1) rec = batch_recs.mean() nelbos = kl_ys + batch_kl_zs + batch_recs nelbo = torch.mean(nelbos) ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ N_batches, dims = x.size() y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) q_mu, q_var = self.enc.encode(x, y) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp, y) rec_ls = -ut.log_bernoulli_with_logits(x, logits) rec = torch.mean( torch.sum(y_prob * rec_ls.reshape(N_batches, -1), dim=1)) kl_y = torch.mean( ut.kl_cat(y_prob, y_logprob, torch.log(torch.ones_like(y_prob) / self.y_dim))) kl_z_ls = ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu), torch.ones_like(q_var)) kl_z = torch.mean( torch.sum(y_prob * kl_z_ls.reshape(N_batches, -1), dim=1)) nelbo = kl_z + kl_y + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar batzh_size = x.size()[0] qm, qv = self.enc.encode(x) z_samp = ut.sample_gaussian(qm, qv) xhat = self.dec.decode(z_samp) pm = torch.zeros([batzh_size, self.z_dim], dtype=torch.float, requires_grad=True) pv = torch.ones(batzh_size, self.z_dim, requires_grad=True) kl = ut.kl_normal( qm, qv, pm, pv ) # require same dimension here, two normal distributions KL(q || p) rec = ut.log_bernoulli_with_logits(x, xhat) # print(kl.size(), rec.size()) # print(x.size()[0], type(x.size()[0])) kl = torch.sum(kl) / batzh_size rec = torch.sum(rec) / batzh_size nelbo = kl - rec # ========================================================================================================================= # ========================================================================================================================= # ========================================================================================================================= # allkl = [] # allrec = [] # for xi in x: # # dim of input xi = 784 # xi = torch.reshape(xi, (1, 784)) # reshape to (1, 784) # qm, qv = self.enc.encode(xi) # z_samp = ut.sample_gaussian(qm, qv) # # xhat = self.dec.decode(z_samp) # # pm = torch.zeros([1, self.z_dim], dtype=torch.float) # pv = torch.ones(self.z_dim) # # kli = ut.kl_normal(qm, qv, pm, pv) # require same dimension here, two normal distributions KL(q || p) # reci = - ut.log_bernoulli_with_logits(xi, xhat) # # print(kli.item(), reci.item()) # # allkl.append(kli.item()) # allrec.append(reci.item()) # # kl = sum(allkl)/len(allkl) # rec = sum(allrec)/len(allrec) # nelbo = kl + rec ################################################################################ ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec