def wse_ff(model, inputs, targets): pred = model.forward(inputs).detach() if not model.constant_var: mean, var = ut.gaussian_parameters_ff(pred, dim=0) else: mean = pred var = model.pred_var sample_trajs = ut.sample_gaussian(mean, var) return ((targets - sample_trajs) ** 2).sum(-1).sum(0)
def negative_elbo_bound(self, x, y): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ if self.CNN: m_xy, v_xy = self.enc_xy.encode_xy(x, y) m_x, v_x = self.enc_x.encode_x(x) m_y, v_y = self.enc_y.encode_y(y) else: m_xy, v_xy = self.enc_xy.encode(x, y) m_x, v_x = self.enc_x.encode(x) m_y, v_y = self.enc_y.encode(y) # kl divergence for latent variable z kl_xy_x = ut.kl_normal(m_xy, v_xy, m_x, v_x) kl_xy_y = ut.kl_normal(m_xy, v_xy, m_y, v_y) # recreation error z = ut.sample_gaussian(m_xy, v_xy) x_logits = self.dec.decode(z) if self.CNN: x = torch.reshape(x, (x.shape[0], -1)) rec = -ut.log_bernoulli_with_logits(x, x_logits) kl_xy_x = kl_xy_x.mean() kl_xy_y = kl_xy_y.mean() rec = rec.mean() nelbo = kl_xy_x * self.kl_xy_x_weight + kl_xy_y * self.kl_xy_y_weight + rec * self.rec_weight ################################################################################ # End of code modification ################################################################################ return nelbo, kl_xy_x, kl_xy_y, rec, m_xy, v_xy
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logits, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat( np.arange(self.y_dim), x.size(0) ) # y.shape -> (self.y_dim * x.size[0],) -> (0,0,0,0,...,1,1,1,1,...,) y = x.new(np.eye( self.y_dim)[y]) # y.shape -> (self.y_dim * x.size[0], 10) x = ut.duplicate(x, self.y_dim) m, v = self.enc.encode(x, y) z = ut.sample_gaussian(m, v) x_logits = self.dec.decode(z, y) kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)) kl_z = ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v) rec = -ut.log_bernoulli_with_logits(x, x_logits) rec = (y_prob.t() * rec.reshape(self.y_dim, -1)).sum(0) kl_z = (y_prob.t() * kl_z.reshape(self.y_dim, -1)).sum(0) kl_y, kl_z, rec = kl_y.mean(), kl_z.mean(), rec.mean() nelbo = rec + kl_z + kl_y ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) # # Generate samples. qm, qv = self.enc.encode(x, y) z_sample = ut.sample_gaussian(qm, qv) xprime = self.dec.decode(z_sample, y) # # Compute loss. y_prior = torch.ones_like(y_logprob) / self.y_dim kl_y = ut.kl_cat(y_prob, y_logprob, y_prior) # # Data is duplicated in a way to make the batch dimension second. kl_z = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v).view(self.y_dim, -1) rec = -ut.log_bernoulli_with_logits(x, xprime).view(self.y_dim, -1) # # Swap axis where the probabilitiees are to match the new batch dimensions. nelbo = kl_y + (y_prob.t() * (kl_z + rec)).sum(0) nelbo = nelbo.mean() # Test set classification accuracy: 0.8104000091552734 ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ X_dupl = ut.duplicate(x, iw) # Input "x" is duplicated "iw" times (m, v) = self.enc.encode(X_dupl) # compute the encoder outut z = ut.sample_gaussian( m, v) # sample a point from the multivariate Gaussian logits = self.dec.decode(z) # pass the sampled "Z" through the decoder # Calculate log Prob of the output x_hat given latent z ln_P_x_z = ut.log_bernoulli_with_logits(X_dupl, logits) # Calculate log(P(z)) #ln_P_z = -torch.sum(z*z, -1)/2.0 ln_P_z = ut.log_normal(z, self.z_prior_m, self.z_prior_v) # Calculate log(Q(z | x)), Conditional Prob of Latent given x #ln_q_z_x = -torch.sum((z-m)*(z-m)/(2.0*v) + torch.log(v), -1) ln_q_z_x = ut.log_normal(z, m, v) exponent = ln_P_x_z + ln_P_z - ln_q_z_x exponent = exponent.reshape(iw, -1) L_m_x = ut.log_mean_exp(exponent, 0) niwae = -torch.mean(L_m_x) kl = torch.tensor(0) rec = torch.tensor(0) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ m, v = self.enc.encode(x) batch_size, dim = m.shape # Duplicate m = ut.duplicate(m, iw) v = ut.duplicate(v, iw) x = ut.duplicate(x, iw) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) km = self.km.repeat(batch_size, 1, 1) kv = self.kv.repeat(batch_size, 1, 1) km = ut.duplicate(km, iw) kv = ut.duplicate(kv, iw) kl_vec = ut.log_normal(z, m, v) - ut.log_normal_mixture(z, km, kv) kl = torch.mean(kl_vec) # TODO: compute the values below rec_vec = ut.log_bernoulli_with_logits(x, logits) rec = torch.neg(torch.mean(rec_vec)) if iw > 1: iwtensor = torch.zeros(iw) j = 0 while j < iw: i = 0 sum = 0 while i < batch_size: sum += rec_vec[j * batch_size + i] i += 1 iwtensor[j] = sum / batch_size - kl j += 1 niwae = torch.neg(ut.log_mean_exp(iwtensor, 0)) else: niwae = rec + kl return niwae, kl, rec
def Decoder(self, z_given_x): _, pmu0, pvar0 = self.MLP3.encode(z_given_x) #last step down, sharing weights with stochastic downward pass from encoder z0 = ut.sample_gaussian(pmu0, pvar0) #return bernoulli logits decoded_logits = self.FinalDecoder.decode(z0) return decoded_logits, pmu0, pvar0
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior pm, pv = ut.gaussian_parameters(self.z_pre, dim=1) # # Generate samples. qm, qv = self.enc.encode(x) niwaes = [] recs = [] kls = [] for i in range(iw): z_sample = ut.sample_gaussian(qm, qv).view(-1, qm.shape[1]) rec = self.dec.decode(z_sample) logptheta_x_g_z = ut.log_bernoulli_with_logits(x, rec) logptheta_z = ut.log_normal_mixture(z_sample, pm, pv) logqphi_z_g_x = ut.log_normal(z_sample, qm, qv) niwae = logptheta_x_g_z + logptheta_z - logqphi_z_g_x # # Normal variables. rec = -ut.log_bernoulli_with_logits(x, rec) kl = ut.log_normal(z_sample, qm, qv) - ut.log_normal_mixture( z_sample, pm, pv) niwaes.append(niwae) recs.append(rec) kls.append(kl) niwaes = torch.stack(niwaes, -1) niwae = ut.log_mean_exp(niwaes, -1) kl = torch.stack(kls, -1) rec = torch.stack(recs, -1) ################################################################################ # End of code modification ################################################################################ return -niwae.mean(), kl.mean(), rec.mean()
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar numSamples = x.size()[0] # Calculate the KL Divergence term # First, find the variational posterior mean and variance qm, qv = self.enc.encode(x) # Next, note that the marginal for Z is always the standard normal pm = torch.zeros([numSamples, self.z_dim], dtype=torch.float) pv = torch.ones([numSamples, self.z_dim], dtype=torch.float) # Now we compute the KL Divergence # Divide by numSamples to get the average kl = torch.sum(ut.kl_normal(qm, qv, pm, pv)) / numSamples # Approximate the reconstruction term # First, sample from the variational posterior zSample = ut.sample_gaussian(qm, qv) # Next, we pass the sample through the decoder to get # parameters for the pixel Bernoullis bernoulliParams = self.dec.decode(zSample) # Now create the approximation logProbForEachSample = ut.log_bernoulli_with_logits(x, bernoulliParams) rec = -1 * torch.sum(logProbForEachSample) / numSamples # nelbo is just kl + rec nelbo = kl + rec ################################################################################ ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior (m, v) = self.enc.encode(x) # compute the encoder output #print(" ***** \n") #print("x xhape ", x.shape) #print("m and v shapes = ", m.shape, v.shape) prior = ut.gaussian_parameters(self.z_pre, dim=1) #print("prior shapes = ", prior[0].shape, prior[1].shape) z = ut.sample_gaussian(m, v) # sample a point from the multivariate Gaussian #print("shape of z = ",z.shape) logits = self.dec.decode(z) # pass the sampled "Z" through the decoder #print("logits shape = ", logits.shape) rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits), -1) # Calculate log Prob of the output log_prob = ut.log_normal(z, m, v) log_prob -= ut.log_normal_mixture(z, prior[0], prior[1]) kl = torch.mean(log_prob) rec = torch.mean(rec) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ m, v = self.enc.encode(x) # m, v -> (batch, dim) # (batch, dim) -> (batch*iw, dim) m = ut.duplicate(m, iw) # (batch, dim) -> (batch*iw, dim) v = ut.duplicate(v, iw) # (batch, dim) -> (batch*iw, dim) x = ut.duplicate(x, iw) # z -> (batch*iw, dim) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) kl = ut.log_normal(z, m, v) - ut.log_normal(z, self.z_prior_m, self.z_prior_v) rec = -ut.log_bernoulli_with_logits(x, logits) nelbo = kl + rec niwae = -ut.log_mean_exp(-nelbo.reshape(iw, -1), dim=0) niwae, kl, rec = niwae.mean(), kl.mean(), rec.mean() ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_iwae_bound_for(self, x, x_hat, y, c, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations x_hat: tensor: (batch, dim): Observations y: tensor: (batch, y_dim): whether observations contain EV c: tensor: (batch, c_dim): target mapping specification iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ # encode qm, qv = self.enc.encode(x, y=y) # replicate qm, qv q_shape = list(qm.shape) qm = qm.unsqueeze(1).expand(q_shape[0], iw, *q_shape[1:]) qv = qv.unsqueeze(1).expand(q_shape[0], iw, *q_shape[1:]) # replicate x, y, c x_shape = list(x_hat.shape) x_hat = x_hat.unsqueeze(1).expand(x_shape[0], iw, *x_shape[1:]) y_shape = list(y.shape) y = y.unsqueeze(1).expand(y_shape[0], iw, *y_shape[1:]) c_shape = list(c.shape) c = c.unsqueeze(1).expand(c_shape[0], iw, *c_shape[1:]) # sample z(1)...z(iw) (for monte carlo estimate of p(x|z(1)) z = ut.sample_gaussian(qm, qv) kl_elem = self.kl_elem(z, qm, qv) # decode mu, var = self.dec.decode(z, y=y, c=c) nll, rec_mse, rec_var = ut.nlog_prob_normal( mu=mu, y=x_hat, var=var, fixed_var=self.warmup, var_pen=self.var_pen) log_prob, rec_mse, rec_var = -nll, rec_mse.mean(), rec_var.mean() niwae = -ut.log_mean_exp(log_prob - kl_elem, dim=1).mean(-1) # reduce rec = -log_prob.mean(1).mean(-1) kl = kl_elem.mean(1).mean(-1) return niwae, kl, rec, rec_mse, rec_var
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) q_m, q_v = self.enc.encode(x) #print("q_m", q_m.size()) z_given_x = ut.sample_gaussian(q_m, q_v) decoded_bernoulli_logits = self.dec.decode(z_given_x) rec = -ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits) #rec = -torch.mean(rec) #terms for KL divergence log_q_phi = ut.log_normal(z_given_x, q_m, q_v) #print("log_q_phi", log_q_phi.size()) log_p_theta = ut.log_normal_mixture(z_given_x, prior[0], prior[1]) #print("log_p_theta", log_p_theta.size()) kl = log_q_phi - log_p_theta #print("kl", kl.size()) nelbo = torch.mean(kl + rec) rec = torch.mean(rec) kl = torch.mean(kl) ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) q_m, q_v = self.enc.encode(x) q_m_, q_v_ = ut.duplicate(q_m, rep=iw), ut.duplicate(q_v, rep=iw) z_given_x = ut.sample_gaussian(q_m_, q_v_) decoded_bernoulli_logits = self.dec.decode(z_given_x) #duplicate x x_dup = ut.duplicate(x, rep=iw) rec = ut.log_bernoulli_with_logits(x_dup, decoded_bernoulli_logits) log_p_theta = ut.log_normal_mixture(z_given_x, prior[0], prior[1]) log_q_phi = ut.log_normal(z_given_x, q_m_, q_v_) kl = log_q_phi - log_p_theta niwae = rec - kl niwae = ut.log_mean_exp(niwae.reshape(iw, -1), dim=0) niwae = -torch.mean(niwae) #yay! ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) prior_m, prior_v = prior batch = x.shape[0] qm, qv = self.enc.encode(x) # Now draw Zs from the posterior qm/qv z = ut.sample_gaussian(qm, qv) l_posterior = ut.log_normal(z, qm, qv) multi_m = prior_m.expand(batch, *prior_m.shape[1:]) multi_v = prior_v.expand(batch, *prior_v.shape[1:]) l_prior = ut.log_normal_mixture(z, multi_m, multi_v) kls = l_posterior - l_prior kl = torch.mean(kls) probs = self.dec.decode(z) recs = ut.log_bernoulli_with_logits(x, probs) rec = -1.0 * torch.mean(recs) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ N_batches, dims = x.size() x = ut.duplicate(x, iw) q_mu, q_var = self.enc.encode(x) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp) probs = ut.log_bernoulli_with_logits(x, logits) log_vals = -ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu), torch.ones_like(q_var)) # log_vals = ut.log_normal(z_samp, torch.zeros_like(q_mu), torch.ones_like(q_var)) - ut.log_normal(z_samp, q_mu, q_var) probs = probs + log_vals niwae = torch.mean(-ut.log_mean_exp(probs.reshape(N_batches, iw), 1)) kl = torch.tensor(0) rec = torch.tensor(0) # niwae = kl + rec ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # # Compute the mixture of Gaussian prior pm, pv = ut.gaussian_parameters(self.z_pre, dim=1) # # Generate samples. qm, qv = self.enc.encode(x) z_sample = ut.sample_gaussian(qm, qv) rec = self.dec.decode(z_sample) # # Compute loss. # KL divergence between the latent distribution and the prior. rec = -ut.log_bernoulli_with_logits(x, rec) # kl = ut.kl_normal(qm, qv, pm, pv) kl = ut.log_normal(z_sample, qm, qv) - ut.log_normal_mixture( z_sample, pm, pv) # # The liklihood of reproducing the sample image given the parameters. # Would need to take the average of this otherwise. nelbo = (kl + rec).mean() # NELBO: 89.24684143066406. KL: 10.346451759338379. Rec: 78.90038299560547 ################################################################################ # End of code modification ################################################################################ return nelbo, kl.mean(), rec.mean()
def get_rwse(model, full_true_trajs, n_samples=100): """ root-weighted square error (RWSE) captures the deviation of a model’s probability mass from real-world trajectories """ n_seqs = full_true_trajs.shape[1] inputs = full_true_trajs[:model.n_input_steps, :, :].detach() targets = full_true_trajs[model.n_input_steps:, :, :2].detach() if model.BBB: for i in range(n_samples): # not using sharpening pred = model.forward(inputs) pred = pred.detach() if not model.constant_var: pred = pred[:, :, :-1] mean_sq_err = ((targets - pred)**2).sum() / n_seqs if i == 0: mean_sq_err_list = mean_sq_err.unsqueeze(-1) else: mean_sq_err = mean_sq_err.unsqueeze(-1) mean_sq_err_list = torch.cat((mean_sq_err_list, mean_sq_err), dim=-1) else: pred = model.forward(inputs) pred = pred.detach() if not model.constant_var: mean, var = ut.gaussian_parameters(pred, dim=-1) else: mean = pred var = model.pred_var for i in range(n_samples): sample_trajs = ut.sample_gaussian(mean, var) mean_sq_err = ((targets - sample_trajs)**2).sum() / n_seqs if i == 0: mean_sq_err_list = mean_sq_err.unsqueeze(-1) else: mean_sq_err = mean_sq_err.unsqueeze(-1) mean_sq_err_list = torch.cat((mean_sq_err_list, mean_sq_err), dim=-1) mean_rwse = mean_sq_err_list.mean().sqrt() return mean_rwse
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ ################################################################################ # End of code modification ################################################################################ #sample z from encoder distribution q_m, q_v = self.enc.encode(x) #print("q_m", q_m.size()) z_given_x = ut.sample_gaussian(q_m, q_v) decoded_bernoulli_logits = self.dec.decode(z_given_x) rec = ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits) rec = -torch.mean(rec) p_m, p_v = torch.zeros(q_m.shape), torch.ones(q_v.shape) kl = ut.kl_normal(q_m, q_v, p_m, p_v) kl = torch.mean(kl) nelbo = rec + kl #kl = ut.kl_normal(q_m, q_v, p_m, p_v) #print("kl_size", kl.size()) #nelbo = (-rec + kl)*torch.tensor(1/x.size(0)) return nelbo, kl, rec
def negative_elbo_bound_for(self, x, x_hat, y, c): qm, qv = self.enc.encode(x, y=y) # sample z(1) (for monte carlo estimate of p(x|z(1)) z = ut.sample_gaussian(qm, qv) kl = self.kl_elem(z, qm, qv) # decode mu, var = self.dec.decode(z, y=y, c=c) rec, rec_mse, rec_var = ut.nlog_prob_normal( mu=mu, y=x_hat, var=var, fixed_var=self.warmup, var_pen=self.var_pen) # reduce kl = kl.mean(-1) rec, rec_mse, rec_var = rec.mean(-1), rec_mse.mean(-1), rec_var.mean(-1) nelbo = kl + rec return nelbo, kl, rec, rec_mse, rec_var
def negative_elbo_bound_gumbel(self, x, tau): """ Gumbel-softmax version. Not slated for release. """ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = F.softmax(y_logits, dim=1) y = ut.gumbel_softmax(y_logits, tau) m, v = self.enc.encode(x, y) z = ut.sample_gaussian(m, v) x_logits = self.dec.decode(z, y) kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)).mean() kl_z = ut.kl_normal(m, v, self.z_prior[0], self.z_prior[1]).mean() rec = -ut.log_bernoulli_with_logits(x, x_logits).mean() nelbo = kl_y + kl_z + rec return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # To help you start, we have computed the mixture of Gaussians prior # prior = (m_mixture, v_mixture) for you, where # m_mixture and v_mixture each have shape (1, self.k, self.z_dim) # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # Compute the mixture of Gaussian prior prior = ut.gaussian_parameters(self.z_pre, dim=1) N_samp, dim = x.size() q_mu, q_var = self.enc.encode(x) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp) rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits)) kl = torch.mean(ut.log_normal(z_samp, q_mu, q_var) - ut.log_normal_mixture(z_samp, *prior)) nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ # encode qm, qv = self.enc.encode(x) # replicate qm, qv q_shape = list(qm.shape) qm = qm.unsqueeze(1).expand(q_shape[0], iw, *q_shape[1:]) qv = qv.unsqueeze(1).expand(q_shape[0], iw, *q_shape[1:]) # replicate x x_shape = list(x.shape) x = x.unsqueeze(1).expand(x_shape[0], iw, *x_shape[1:]) # sample z(1)...z(iw) (for monte carlo estimate of p(x|z(1)) z = ut.sample_gaussian(qm, qv) # decode mu, var = self.dec.decode(z) kl_elem = self.kl_elem(z, qm, qv) nll, rec_mse, rec_var = ut.nlog_prob_normal(mu=mu, y=x, var=var, fixed_var=self.warmup, var_pen=self.var_pen) log_prob, rec_mse, rec_var = -nll, rec_mse.mean(), rec_var.mean() niwae = -ut.log_mean_exp(log_prob - kl_elem, dim=1).mean(-1) rec = -log_prob.mean(1).mean(-1) kl = kl_elem.mean(1).mean(-1) return niwae, kl, rec, rec_mse, rec_var
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute niwae (negative IWAE) with iw importance samples, and the KL # and Rec decomposition of the Evidence Lower Bound # # Outputs should all be scalar ################################################################################ niwae = 0 for i in range(x.size()[0]): x_i = x[i][:].view(1, x.size()[1]) x_i = ut.duplicate(x_i, iw) m, v = self.enc.encode(x_i) z = ut.sample_gaussian(m, v) x_hat = self.dec.decode(z) exponent = ut.log_bernoulli_with_logits(x_i, x_hat) + \ ut.log_normal(z, self.z_prior_m.expand(m.size()), self.z_prior_v.expand(v.size())) \ - ut.log_normal(z, m, v) niwae += -ut.log_mean_exp(exponent, 0).squeeze() #print(np.std(exponent.data.cpu().numpy())) #print(exponent.data.cpu().numpy().shape) niwae = niwae / x.size()[0] kl = rec = torch.tensor(0) ################################################################################ # End of code modification ################################################################################ return niwae, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # 1. get latent distribution and one sample. m, v = self.enc.encode(x) z = ut.sample_gaussian(m, v) x_logits = self.dec.decode(z) # 2. get KL divergent of q(z|x) and p(z). (assume z belongs to standard guassian distribution) pz_m, pz_v = self.z_prior[0], self.z_prior[1] kl_loss = ut.kl_normal(m, v, pz_m, pz_v) # 3. reconstruct loss, encourage x = x_hat r_loss = ut.log_bernoulli_with_logits(x, x_logits) nelbo = -1 * (r_loss - kl_loss) nelbo, kl, r = nelbo.mean(), kl_loss.mean(), -1 * r_loss.mean() return nelbo, kl, r ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ # the first dimension of m and v is batch # each input data generate a normal distribution m, v = self.enc.encode(x) kl = ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) # get p(x|z) since logit is from latent variable z rec = -ut.log_bernoulli_with_logits(x, logits) kl = kl.mean() rec = rec.mean() nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ (m, v) = self.enc.encode(x) # compute the encoder outut kl = torch.mean(ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v), -1) z = ut.sample_gaussian( m, v) #sample a point from the multivariate Gaussian logits = self.dec.decode(z) #pass the sampled "Z" through the decoder rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits), -1) #Calculate log Prob of the output nelbo = torch.mean(kl + rec) kl = torch.mean(kl) rec = torch.mean(rec) ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL and Rec decomposition # # Note that nelbo = kl + rec # # Outputs should all be scalar ################################################################################ phi = self.enc.encode(x) z_hat = ut.sample_gaussian(*phi) kl = ut.kl_normal(*phi, *self.z_prior).mean() # prior = ut.gaussian_parameters(self.z_pre, dim=1) # # q = self.enc.encode(x) # z_hat = ut.sample_gaussian(*q) # # kl = ut.log_normal(z_hat, *q) - ut.log_normal_mixture(z_hat, *prior) # kl = kl.mean() rec = -ut.log_bernoulli_with_logits(x, self.dec.decode(z_hat)).mean() nelbo = kl + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl, rec
def negative_iwae_bound(self, x, iw): """ Computes the Importance Weighted Autoencoder Bound Additionally, we also compute the ELBO KL and reconstruction terms Args: x: tensor: (batch, dim): Observations iw: int: (): Number of importance weighted samples Returns: niwae: tensor: (): Negative IWAE bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ m, v = self.enc.encode(x) # Duplicate m = ut.duplicate(m, iw) v = ut.duplicate(v, iw) x = ut.duplicate(x, iw) z = ut.sample_gaussian(m, v) logits = self.dec.decode(z) # TODO: compute the values below # Get KL and Rec of elbo again pm = torch.zeros((m.shape)) pv = torch.ones((v.shape)) kl = ut.kl_normal(m, v, pm, pv) rec = ut.log_bernoulli_with_logits(x, logits) # Now get the log mean of the exp of the KL divergence and subtact the # reconstuction from all of the weighted samples niwae = ut.log_mean_exp(ut.kl_normal(m, v, pm, pv), dim=0) - torch.mean( ut.log_bernoulli_with_logits(x, logits)) return niwae, kl, rec
def Encoder(self, x): #---deterministic upward pass #upwards l_enc_a0, mu_up0, var_up0 = self.MLP1.encode(x) #first level #encoder layer 1 mu, var _, qmu1, qvar1 = self.MLP2.encode(l_enc_a0) #second level #---stochastic downward pass #sample a z on top z_down = ut.sample_gaussian(qmu1, qvar1) #partially downwards _, mu_dn0, var_dn0 = self.MLP3.encode(z_down) #compute new mu, sigma at first level as per paper prec_up0 = var_up0**(-1) prec_dn0 = var_dn0**(-1) #encoder layer 0 mu, var qmu0 = (mu_up0 * prec_up0 + mu_dn0 * prec_dn0) / (prec_up0 + prec_dn0) qvar0 = (prec_up0 + prec_dn0)**(-1) return z_down, qmu0, qvar0, qmu1, qvar1