Ejemplo n.º 1
0
    def negative_elbo_bound(self, x, y):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition
        #
        # To assist you in the vectorization of the summation over y, we have
        # the computation of q(y | x) and some tensor tiling code for you.
        #
        # Note that nelbo = kl_z + kl_y + rec
        #
        # Outputs should all be scalar
        ################################################################################

        if self.CNN:
            m_xy, v_xy = self.enc_xy.encode_xy(x, y)
            m_x, v_x = self.enc_x.encode_x(x)
            m_y, v_y = self.enc_y.encode_y(y)
        else:
            m_xy, v_xy = self.enc_xy.encode(x, y)
            m_x, v_x = self.enc_x.encode(x)
            m_y, v_y = self.enc_y.encode(y)

        # kl divergence for latent variable z
        kl_xy_x = ut.kl_normal(m_xy, v_xy, m_x, v_x)
        kl_xy_y = ut.kl_normal(m_xy, v_xy, m_y, v_y)

        # recreation error
        z = ut.sample_gaussian(m_xy, v_xy)
        x_logits = self.dec.decode(z)

        if self.CNN:
            x = torch.reshape(x, (x.shape[0], -1))
        rec = -ut.log_bernoulli_with_logits(x, x_logits)

        kl_xy_x = kl_xy_x.mean()
        kl_xy_y = kl_xy_y.mean()
        rec = rec.mean()
        nelbo = kl_xy_x * self.kl_xy_x_weight + kl_xy_y * self.kl_xy_y_weight + rec * self.rec_weight

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_xy_x, kl_xy_y, rec, m_xy, v_xy
Ejemplo n.º 2
0
    def negative_elbo_bound(self, x, y):
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that we are interested in the ELBO of ln p(x | y)
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################

        q_mu, q_var = self.enc.encode(x, y)

        z_samp = ut.sample_gaussian(q_mu, q_var)

        logits = self.dec.decode(z_samp, y)

        rec = -torch.mean(
            ut.log_normal(x, logits, 0.1 * torch.ones_like(logits)))

        kl_z = torch.mean(
            ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu),
                         torch.ones_like(q_var)))

        nelbo = kl_z + rec

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, rec
Ejemplo n.º 3
0
    def negative_elbo_bound(self, x, y):
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that we are interested in the ELBO of ln p(x | y)
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################
        m, v = self.enc.encode(x, y)
        z = ut.sample_gaussian(m, v)
        x_m = self.dec.decode(z, y)

        rec = ut.log_normal(x, x_m, self.x_v.expand(x_m.size())).mean()
        kl_z = ut.kl_normal(m, v,
                self.z_prior_m.expand(m.size()),
                self.z_prior_v.expand(v.size())).mean()

        nelbo = kl_z - rec

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, rec
Ejemplo n.º 4
0
    def negative_elbo_bound(self, x, y):
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that we are interested in the ELBO of ln p(x | y)
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################

        # sample z
        m, v = self.enc.encode(x, y)
        z = ut.sample_gaussian(m, v)

        # generate x given z,y
        x_logits = self.dec.decode(z, y)

        # kl on q(z)
        kl_z = ut.kl_normal(m, v, self.z_prior[0], self.z_prior[1])


        rec_loss = -ut.log_normal(x, x_logits, 0.1 * torch.ones_like(x_logits))
        kl_z, rec_loss = rec_loss.mean(), kl_z.mean()
        nelbo = rec_loss + kl_z

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, rec_loss
Ejemplo n.º 5
0
    def negative_elbo_bound(self, x, y):
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that we are interested in the ELBO of ln p(x | y)
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################
        # Duplicate y based on x's batch size. Then duplicate x
        # This enumerates all possible combination of x with labels (0, 1, ..., 9)

        qm, qv = self.enc.encode(x, y)
        z = ut.sample_gaussian(qm, qv)
        recon_m = self.dec.decode(
            z, y)  # mu: [batch, dim_x] sigma: [batch, 1/10diag(dim_x)]
        dist = Normal(loc=recon_m, scale=1 / 10 * torch.ones_like(x))
        log_prob = dist.log_prob(x).sum(dim=1)

        rec = -torch.mean(log_prob, dim=0)
        kl_z = torch.mean(ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v),
                          dim=0)

        nelbo = rec + kl_z
        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, rec
Ejemplo n.º 6
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################
        m, v = self.enc.encode(x)
        z = ut.sample_gaussian(m, v)
        recon_logits = self.dec.decode(z)

        rec = -torch.mean(ut.log_bernoulli_with_logits(x, recon_logits), dim=0)
        kl = torch.mean(ut.kl_normal(m, v, torch.zeros_like(m),
                                     torch.ones_like(v)),
                        dim=0)
        nelbo = rec + kl

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
Ejemplo n.º 7
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        m, v = self.enc.encode(x)
        z = ut.sample_gaussian(m, v)
        logits = self.dec.decode(z)

        # TODO: compute the values below

        # The model priors for a VAE are standard Mean and Variance
        pm = torch.zeros((m.shape))
        pv = torch.ones((v.shape))

        # Compute the KL divergence from the calculated m and v given x to the priors
        kl = torch.mean(ut.kl_normal(m, v, pm, pv))

        # Calculate the reconstruction loss of p(x|z) = log(Bern(x|decoder(z)))
        rec = torch.mean(ut.log_bernoulli_with_logits(x, logits))

        # Negative ELBO definition
        nelbo = kl - rec

        return nelbo, kl, rec
Ejemplo n.º 8
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition
        #
        # To assist you in the vectorization of the summation over y, we have
        # the computation of q(y | x) and some tensor tiling code for you.
        #
        # Note that nelbo = kl_z + kl_y + rec
        #
        # Outputs should all be scalar
        ################################################################################
        y_logits = self.cls.classify(x)
        y_logprob = F.log_softmax(y_logits, dim=1)
        y_prob = torch.softmax(y_logits, dim=1)  # (batch, y_dim)

        p_y = 1 / 10 * torch.ones_like(y_prob)
        kl_y = torch.mean(ut.kl_cat(y_prob, y_logprob, torch.log(p_y)), dim=0)

        batch_size = x.shape[0]
        # Duplicate y based on x's batch size. Then duplicate x
        # This enumerates all possible combination of x with labels (0, 1, ..., 9)
        y = np.repeat(np.arange(self.y_dim), x.size(0))
        y = x.new(np.eye(self.y_dim)[y])
        x = ut.duplicate(x, self.y_dim)

        qm, qv = self.enc.encode(x, y)
        z = ut.sample_gaussian(qm, qv)
        recon_logits = self.dec.decode(z, y)

        p_x_given_yz = ut.log_bernoulli_with_logits(x, recon_logits)
        p_x_given_yz = p_x_given_yz.reshape(self.y_dim, batch_size).transpose(
            0, 1)  #[batch, 10]

        rec = -torch.mean(torch.sum(p_x_given_yz * y_prob, dim=1), dim=0)

        kl_z_over_xy = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v)
        kl_z_over_xy = kl_z_over_xy.reshape(self.y_dim,
                                            batch_size).transpose(0, 1)

        kl_z = torch.mean(torch.sum(kl_z_over_xy * y_prob, dim=1), dim=0)

        nelbo = rec + kl_y + kl_z

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, kl_y, rec
Ejemplo n.º 9
0
    def negative_elbo_bound(self, x, beta):

        z_given_x, qmu0, qvar0, qmu1, qvar1 = self.Encoder(x)
        decoded_bernoulli_logits, pmu0, pvar0 = self.Decoder(z_given_x)

        rec = ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits)
        #print(rec.shape)
        rec = -torch.mean(rec)

        pm, pv = torch.zeros(qmu1.shape), torch.ones(qvar1.shape)
        #print("mu1", mu1)
        kl1 = ut.kl_normal(qmu1, qvar1, pm, pv)
        kl2 = ut.kl_normal(qmu0, qvar0, pmu0, pvar0)
        kl = beta * torch.mean(kl1 + kl2)

        nelbo = rec + kl
        #nelbo = rec
        return nelbo, rec, kl
    def loss_encoder(self, x):
        m, v = self.enc.encode(x)
        kl = ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v).mean()
        # nelbo, kl, rec = self.negative_iwae_bound(x, 10)
        loss = kl

        summaries = dict((('gen/kl_z', kl), ))

        return loss, summaries
Ejemplo n.º 11
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition
        #
        # To assist you in the vectorization of the summation over y, we have
        # the computation of q(y | x) and some tensor tiling code for you.
        #
        # Note that nelbo = kl_z + kl_y + rec
        #
        # Outputs should all be scalar
        ################################################################################
        y_logits = self.cls.classify(x)
        y_logprob = F.log_softmax(y_logits, dim=1)
        y_prob = torch.softmax(y_logits, dim=1)

        # Duplicate y based on x's batch size. Then duplicate x
        # This enumerates all possible combination of x with labels (0, 1, ..., 9)
        y = np.repeat(np.arange(self.y_dim), x.size(0))
        y = x.new(np.eye(self.y_dim)[y])
        x = ut.duplicate(x, self.y_dim)

        # sample z from x and y
        qm, qv = self.enc.encode(x, y)
        z = ut.sample_gaussian(qm, qv)

        # compute kl
        x_logits = self.dec.decode(z, y)
        kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim))
        kl_z = ut.kl_normal(qm, qv, self.z_prior[0], self.z_prior[1])
        rec_loss = -ut.log_bernoulli_with_logits(x, x_logits)

        # (y_dim * batch)
        # Compute the expected reconstruction and kl base on the distribution q(y|x), q(y,z|x)
        rec_loss_y = (y_prob.t() * rec_loss.reshape(self.y_dim, -1)).sum(0)
        kl_z_y = (y_prob.t() * kl_z.reshape(self.y_dim, -1)).sum(0)

        # Reduce to means
        kl_y, kl_z, rec = kl_y.mean(), kl_z_y.mean(), rec_loss_y.mean()
        nelbo = rec + kl_z + kl_y

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, kl_y, rec
Ejemplo n.º 12
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition
        #
        # To assist you in the vectorization of the summation over y, we have
        # the computation of q(y | x) and some tensor tiling code for you.
        #
        # Note that nelbo = kl_z + kl_y + rec
        #
        # Outputs should all be scalar
        ################################################################################
        y_logits = self.cls.classify(x)
        y_logprob = F.log_softmax(y_logits, dim=1)
        y_prob = torch.softmax(y_logprob, dim=1)  # (batch, y_dim)
        # Duplicate y based on x's batch size. Then duplicate x
        # This enumerates all possible combination of x with labels (0, 1, ..., 9)
        y = np.repeat(np.arange(self.y_dim), x.size(0))
        y = x.new(np.eye(self.y_dim)[y])
        x = ut.duplicate(x, self.y_dim)
        #
        # Generate samples.
        qm, qv = self.enc.encode(x, y)
        z_sample = ut.sample_gaussian(qm, qv)
        xprime = self.dec.decode(z_sample, y)
        #
        # Compute loss.
        y_prior = torch.ones_like(y_logprob) / self.y_dim
        kl_y = ut.kl_cat(y_prob, y_logprob, y_prior)
        #
        # Data is duplicated in a way to make the batch dimension second.
        kl_z = ut.kl_normal(qm, qv, self.z_prior_m,
                            self.z_prior_v).view(self.y_dim, -1)
        rec = -ut.log_bernoulli_with_logits(x, xprime).view(self.y_dim, -1)
        #
        # Swap axis where the probabilitiees are to match the new batch dimensions.
        nelbo = kl_y + (y_prob.t() * (kl_z + rec)).sum(0)
        nelbo = nelbo.mean()
        # Test set classification accuracy: 0.8104000091552734
        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, kl_y, rec
Ejemplo n.º 13
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        numSamples = x.size()[0]
        
        # Calculate the KL Divergence term
        # First, find the variational posterior mean and variance
        qm, qv = self.enc.encode(x)

        # Next, note that the marginal for Z is always the standard normal
        pm = torch.zeros([numSamples, self.z_dim], dtype=torch.float)
        pv = torch.ones([numSamples, self.z_dim], dtype=torch.float)
        
        # Now we compute the KL Divergence
        # Divide by numSamples to get the average
        kl = torch.sum(ut.kl_normal(qm, qv, pm, pv)) / numSamples

        # Approximate the reconstruction term
        # First, sample from the variational posterior
        zSample = ut.sample_gaussian(qm, qv)

        # Next, we pass the sample through the decoder to get
        # parameters for the pixel Bernoullis
        bernoulliParams = self.dec.decode(zSample)

        # Now create the approximation
        logProbForEachSample = ut.log_bernoulli_with_logits(x, bernoulliParams)
        rec = -1 * torch.sum(logProbForEachSample) / numSamples
  
        # nelbo is just kl + rec
        nelbo = kl + rec
        ################################################################################

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
Ejemplo n.º 14
0
    def negative_iwae_bound(self, x, iw):
        """
        Computes the Importance Weighted Autoencoder Bound
        Additionally, we also compute the ELBO KL and reconstruction terms

        Args:
            x: tensor: (batch, dim): Observations
            iw: int: (): Number of importance weighted samples

        Returns:
            niwae: tensor: (): Negative IWAE bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        m, v = self.enc.encode(x)

        # Duplicate
        m = ut.duplicate(m, iw)
        v = ut.duplicate(v, iw)
        x = ut.duplicate(x, iw)
        z = ut.sample_gaussian(m, v)
        logits = self.dec.decode(z)

        # TODO: compute the values below

        # Get KL and Rec of elbo again
        pm = torch.zeros((m.shape))
        pv = torch.ones((v.shape))
        kl = ut.kl_normal(m, v, pm, pv)
        rec = ut.log_bernoulli_with_logits(x, logits)

        # Now get the log mean of the exp of the KL divergence and subtact the
        # reconstuction from all of the weighted samples
        niwae = ut.log_mean_exp(ut.kl_normal(m, v, pm, pv),
                                dim=0) - torch.mean(
                                    ut.log_bernoulli_with_logits(x, logits))

        return niwae, kl, rec
Ejemplo n.º 15
0
    def negative_iwae_bound(self, x, iw):
        """
        Computes the Importance Weighted Autoencoder Bound
        Additionally, we also compute the ELBO KL and reconstruction terms

        Args:
            x: tensor: (batch, dim): Observations
            iw: int: (): Number of importance weighted samples

        Returns:
            niwae: tensor: (): Negative IWAE bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute niwae (negative IWAE) with iw importance samples, and the KL
        # and Rec decomposition of the Evidence Lower Bound
        #
        # Outputs should all be scalar
        ################################################################################
        m, v = self.enc.encode(x)

        dist = Normal(loc=m, scale=torch.sqrt(v))
        z_sample = dist.rsample(sample_shape=torch.Size([iw]))
        log_batch_z_sample = []
        kl_batch_z_sample = []

        for i in range(iw):
            recon_logits = self.dec.decode(z_sample[i])
            log_batch_z_sample.append(
                ut.log_bernoulli_with_logits(
                    x, recon_logits))  # [batch, z_sample]
            kl_batch_z_sample.append(
                ut.kl_normal(m, v, torch.zeros_like(m), torch.ones_like(v)))

        log_batch_z_sample = torch.stack(log_batch_z_sample, dim=1)
        kl_batch_z_sample = torch.stack(kl_batch_z_sample, dim=1)

        niwae = -ut.log_mean_exp(log_batch_z_sample - kl_batch_z_sample,
                                 dim=1).mean(dim=0)

        rec = -torch.mean(log_batch_z_sample, dim=0)  # over batch
        kl = torch.mean(kl_batch_z_sample, dim=0)

        ################################################################################
        # End of code modification
        ################################################################################
        return niwae, kl, rec
Ejemplo n.º 16
0
    def negative_iwae_bound(self, x, iw):
        """
        Computes the Importance Weighted Autoencoder Bound
        Additionally, we also compute the ELBO KL and reconstruction terms

        Args:
            x: tensor: (batch, dim): Observations
            iw: int: (): Number of importance weighted samples

        Returns:
            niwae: tensor: (): Negative IWAE bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute niwae (negative IWAE) with iw importance samples, and the KL
        # and Rec decomposition of the Evidence Lower Bound
        #
        # Outputs should all be scalar
        ################################################################################

        N_batches, dims = x.size()

        x = ut.duplicate(x, iw)

        q_mu, q_var = self.enc.encode(x)

        z_samp = ut.sample_gaussian(q_mu, q_var)

        logits = self.dec.decode(z_samp)

        probs = ut.log_bernoulli_with_logits(x, logits)

        log_vals = -ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu), torch.ones_like(q_var))
        # log_vals = ut.log_normal(z_samp, torch.zeros_like(q_mu), torch.ones_like(q_var)) - ut.log_normal(z_samp, q_mu, q_var)

        probs = probs + log_vals

        niwae = torch.mean(-ut.log_mean_exp(probs.reshape(N_batches, iw), 1))

        kl = torch.tensor(0)
        rec = torch.tensor(0)
        # niwae = kl + rec

        ################################################################################
        # End of code modification
        ################################################################################
        return niwae, kl, rec
Ejemplo n.º 17
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################

        ################################################################################
        # End of code modification
        ################################################################################

        #sample z from encoder distribution
        q_m, q_v = self.enc.encode(x)
        #print("q_m", q_m.size())
        z_given_x = ut.sample_gaussian(q_m, q_v)
        decoded_bernoulli_logits = self.dec.decode(z_given_x)
        rec = ut.log_bernoulli_with_logits(x, decoded_bernoulli_logits)
        rec = -torch.mean(rec)

        p_m, p_v = torch.zeros(q_m.shape), torch.ones(q_v.shape)

        kl = ut.kl_normal(q_m, q_v, p_m, p_v)
        kl = torch.mean(kl)

        nelbo = rec + kl

        #kl = ut.kl_normal(q_m, q_v, p_m, p_v)
        #print("kl_size", kl.size())

        #nelbo = (-rec + kl)*torch.tensor(1/x.size(0))

        return nelbo, kl, rec
Ejemplo n.º 18
0
    def negative_elbo_bound_gumbel(self, x, tau):
        """
        Gumbel-softmax version. Not slated for release.
        """
        y_logits = self.cls.classify(x)
        y_logprob = F.log_softmax(y_logits, dim=1)
        y_prob = F.softmax(y_logits, dim=1)
        y = ut.gumbel_softmax(y_logits, tau)

        m, v = self.enc.encode(x, y)
        z = ut.sample_gaussian(m, v)
        x_logits = self.dec.decode(z, y)

        kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)).mean()
        kl_z = ut.kl_normal(m, v, self.z_prior[0], self.z_prior[1]).mean()
        rec = -ut.log_bernoulli_with_logits(x, x_logits).mean()
        nelbo = kl_y + kl_z + rec
        return nelbo, kl_z, kl_y, rec
Ejemplo n.º 19
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################

        # 1. get latent distribution and one sample.
        m, v = self.enc.encode(x)
        z = ut.sample_gaussian(m, v)

        x_logits = self.dec.decode(z)

        # 2. get KL divergent of q(z|x) and p(z). (assume z belongs to standard guassian distribution)
        pz_m, pz_v = self.z_prior[0], self.z_prior[1]
        kl_loss = ut.kl_normal(m, v, pz_m, pz_v)

        # 3. reconstruct loss, encourage x = x_hat
        r_loss = ut.log_bernoulli_with_logits(x, x_logits)
        nelbo = -1 * (r_loss - kl_loss)
        nelbo, kl, r = nelbo.mean(), kl_loss.mean(), -1 * r_loss.mean()
        return nelbo, kl, r

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################

        # the first dimension of m and v is batch
        # each input data generate a normal distribution
        m, v = self.enc.encode(x)
        kl = ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v)

        z = ut.sample_gaussian(m, v)
        logits = self.dec.decode(z)

        # get p(x|z) since logit is from latent variable z
        rec = -ut.log_bernoulli_with_logits(x, logits)
        kl = kl.mean()
        rec = rec.mean()
        nelbo = kl + rec

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
Ejemplo n.º 21
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################

        (m, v) = self.enc.encode(x)  # compute the encoder outut

        kl = torch.mean(ut.kl_normal(m, v, self.z_prior_m, self.z_prior_v), -1)

        z = ut.sample_gaussian(
            m, v)  #sample a point from the multivariate Gaussian
        logits = self.dec.decode(z)  #pass the sampled "Z" through the decoder

        rec = -torch.mean(ut.log_bernoulli_with_logits(x, logits),
                          -1)  #Calculate log Prob of the output

        nelbo = torch.mean(kl + rec)
        kl = torch.mean(kl)
        rec = torch.mean(rec)
        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
Ejemplo n.º 22
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################
        phi = self.enc.encode(x)
        z_hat = ut.sample_gaussian(*phi)

        kl = ut.kl_normal(*phi, *self.z_prior).mean()
        # prior = ut.gaussian_parameters(self.z_pre, dim=1)
        #
        # q = self.enc.encode(x)
        # z_hat = ut.sample_gaussian(*q)
        #
        # kl = ut.log_normal(z_hat, *q) - ut.log_normal_mixture(z_hat, *prior)
        # kl = kl.mean()

        rec = -ut.log_bernoulli_with_logits(x, self.dec.decode(z_hat)).mean()
        nelbo = kl + rec
        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
Ejemplo n.º 23
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        ################################################################################
        qm, qv = self.enc.encode(x)
        pm = self.z_prior[0].expand(qm.shape)
        pv = self.z_prior[1].expand(qv.shape)
        kls = ut.kl_normal(qm, qv, pm, pv)
        kl = torch.mean(kls)

        z = ut.sample_gaussian(qm, qv)
        probs = self.dec.decode(z)
        recs = ut.log_bernoulli_with_logits(x, probs)
        rec = -1.0 * torch.mean(recs)

        nelbo = kl + rec
        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec
Ejemplo n.º 24
0
 def kl_elem(self, z, qm, qv):
     kl_elem = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v)
     return kl_elem
Ejemplo n.º 25
0
    def negative_iwae_bound(self, x, iw):
        """
        Computes the Importance Weighted Autoencoder Bound
        Additionally, we also compute the ELBO KL and reconstruction terms

        Args:
            x: tensor: (batch, dim): Observations
            iw: int: (): Number of importance weighted samples

        Returns:
            niwae: tensor: (): Negative IWAE bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute niwae (negative IWAE) with iw importance samples, and the KL
        # and Rec decomposition of the Evidence Lower Bound
        #
        # Outputs should all be scalar
        ################################################################################
        # m, v = self.enc.encode(x)
        #
        # # expand m to iw samples
        # m_iw = ut.duplicate(m, iw)
        # v_iw = ut.duplicate(v, iw)
        # x_iw = ut.duplicate(x, iw)
        #
        # # sample z [iw]
        # z = ut.sample_gaussian(m_iw, v_iw)
        # x_logits = self.dec.decode(z)
        #
        # # reconstruct loss
        # rec_loss = -ut.log_bernoulli_with_logits(x_iw, x_logits)
        #
        # # kl
        # kl = ut.log_normal(z, m, v) - ut.log_normal(z, self.z_prior[0], self.z_prior[1])
        #
        # # iw nelbo
        # nelbo = kl + rec_loss
        #
        # niwae = -ut.log_mean_exp(-nelbo.reshape(iw, -1), dim=0)
        # niwae, kl, rec = niwae.mean(), kl.mean(), rec_loss.mean()

        m, v = self.enc.encode(x)

        dist = Normal(loc=m, scale=torch.sqrt(v))
        z_iw = dist.rsample(sample_shape=torch.Size([iw]))

        log_z_batch, kl_z_batch = [], []

        # for each z sample
        for i in range(iw):
            recon_logits = self.dec.decode(z_iw[i])
            log_z_batch.append(ut.log_bernoulli_with_logits(x, recon_logits))  # [batch, z_sample]
            kl_z_batch.append(ut.kl_normal(m, v, torch.zeros_like(m), torch.ones_like(v)))

        # aggregate result together
        log_z = torch.stack(log_z_batch, dim=1)
        kl_z = torch.stack(kl_z_batch, dim=1)

        niwae = -ut.log_mean_exp(log_z - kl_z, dim=1).mean(dim=0)

        rec_loss = -torch.mean(log_z, dim=0)  # over batch
        kl = torch.mean(kl_z, dim=0)

        ################################################################################
        # End of code modification
        ################################################################################
        return niwae, kl, rec_loss
Ejemplo n.º 26
0
    def negative_iwae_bound(self, x, iw):
        """
        Computes the Importance Weighted Autoencoder Bound
        Additionally, we also compute the ELBO KL and reconstruction terms

        Args:
            x: tensor: (batch, dim): Observations
            iw: int: (): Number of importance weighted samples

        Returns:
            niwae: tensor: (): Negative IWAE bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute niwae (negative IWAE) with iw importance samples, and the KL
        # and Rec decomposition of the Evidence Lower Bound
        #
        # Outputs should all be scalar
        ################################################################################

        batch = x.shape[0]
        multi_x = ut.duplicate(x, iw)

        qm, qv = self.enc.encode(x)
        multi_qm = ut.duplicate(qm, iw)
        multi_qv = ut.duplicate(qv, iw)

        # z will be (batch*iw x z_dim)
        # with sampled z's for a given x non-contiguous!
        z = ut.sample_gaussian(multi_qm, multi_qv)

        probs = self.dec.decode(z)
        recs = ut.log_bernoulli_with_logits(multi_x, probs)
        rec = -1.0 * torch.mean(recs)

        multi_pm = self.z_prior[0].expand(multi_qm.shape)
        multi_pv = self.z_prior[1].expand(multi_qv.shape)

        z_priors = ut.log_normal(z, multi_pm, multi_pv)
        x_posteriors = recs
        z_posteriors = ut.log_normal(z, multi_qm, multi_qv)

        log_ratios = z_priors + x_posteriors - z_posteriors
        # Should be (batch*iw, z_dim), batch ratios non contiguous

        unflat_log_ratios = log_ratios.reshape(iw, batch)

        niwaes = ut.log_mean_exp(unflat_log_ratios, 0)
        niwae = -1.0 * torch.mean(niwaes)

        pm = self.z_prior[0].expand(qm.shape)
        pv = self.z_prior[1].expand(qv.shape)
        kls = ut.kl_normal(qm, qv, pm, pv)
        kl = torch.mean(kls)

        ################################################################################
        # End of code modification
        ################################################################################
        return niwae, kl, rec
Ejemplo n.º 27
0
    def negative_elbo_bound(self,
                            x,
                            label,
                            mask=None,
                            sample=False,
                            adj=None,
                            lambdav=0.001):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        assert label.size()[1] == self.z1_dim

        q_m, q_v = self.enc.encode(x.to(device))
        q_m, q_v = q_m.reshape([q_m.size()[0], self.z1_dim, self.z2_dim
                                ]), torch.ones(q_m.size()[0], self.z1_dim,
                                               self.z2_dim).to(device)

        decode_m, decode_v = self.dag.calculate_dag(
            q_m.to(device),
            torch.ones(q_m.size()[0], self.z1_dim, self.z2_dim).to(device))
        decode_m, decode_v = decode_m.reshape(
            [q_m.size()[0], self.z1_dim, self.z2_dim]), decode_v
        if sample == False:
            if mask != None and mask in [0, 1, 3]:
                z_mask = torch.ones(q_m.size()[0], self.z1_dim,
                                    self.z2_dim).to(device) * adj
                decode_m[:, mask, :] = z_mask[:, mask, :]
                decode_v[:, mask, :] = z_mask[:, mask, :]
            m_zm, m_zv = self.dag.mask_z(decode_m.to(device)).reshape([
                q_m.size()[0], self.z1_dim, self.z2_dim
            ]), decode_v.reshape([q_m.size()[0], self.z1_dim, self.z2_dim])
            m_u = self.dag.mask_u(label.to(device))
            #mask

            f_z = self.mask_z.mix(m_zm).reshape(
                [q_m.size()[0], self.z1_dim, self.z2_dim]).to(device)

            e_tilde = self.attn.attention(
                decode_m.reshape([q_m.size()[0], self.z1_dim,
                                  self.z2_dim]).to(device),
                q_m.reshape([q_m.size()[0], self.z1_dim,
                             self.z2_dim]).to(device))[0]

            f_z1 = f_z + e_tilde
            if mask != None and mask == 2:
                z_mask = torch.ones(q_m.size()[0], self.z1_dim,
                                    self.z2_dim).to(device) * adj
                f_z1[:, mask, :] = z_mask[:, mask, :]
                m_zv[:, mask, :] = z_mask[:, mask, :]
            g_u = self.mask_u.mix(m_u).to(device)

            m_zv = torch.ones([q_m.size()[0], self.z1_dim,
                               self.z2_dim]).to(device)
            z_given_dag = ut.conditional_sample_gaussian(f_z1, q_v * lambdav)

        decoded_bernoulli_logits, x1, x2, x3, x4 = self.dec.decode_sep(
            z_given_dag.reshape([z_given_dag.size()[0], self.z_dim]),
            label.to(device))

        rec = ut.log_bernoulli_with_logits(
            x, decoded_bernoulli_logits.reshape(x.size()))
        rec = -torch.mean(rec)

        p_m, p_v = torch.zeros(q_m.size()), torch.ones(q_m.size())
        cp_m, cp_v = ut.condition_prior(self.scale, label, self.z2_dim)

        cp_v = torch.ones([q_m.size()[0], self.z1_dim, self.z2_dim]).to(device)
        cp_z = ut.conditional_sample_gaussian(cp_m.to(device), cp_v.to(device))
        kl = torch.zeros(1).to(device)
        kl = 0.3 * ut.kl_normal(
            q_m.view(-1, self.z_dim).to(device),
            q_v.view(-1, self.z_dim).to(device),
            p_m.view(-1, self.z_dim).to(device),
            p_v.view(-1, self.z_dim).to(device))

        for i in range(self.z1_dim):
            kl = kl + 1 * ut.kl_normal(
                decode_m[:, i, :].to(device), cp_v[:, i, :].to(device),
                cp_m[:, i, :].to(device), cp_v[:, i, :].to(device))
        kl = torch.mean(kl)
        mask_kl = torch.zeros(1).to(device)
        mask_kl2 = torch.zeros(1).to(device)
        for i in range(self.z1_dim):
            mask_kl = mask_kl + 1 * ut.kl_normal(
                f_z1[:, i, :].to(device), cp_v[:, i, :].to(device),
                cp_m[:, i, :].to(device), cp_v[:, i, :].to(device))
        u_loss = torch.nn.MSELoss()
        mask_l = torch.mean(mask_kl) + u_loss(g_u, label.float().to(device))
        nelbo = rec + kl + mask_l
        return nelbo, kl, rec, decoded_bernoulli_logits.reshape(
            x.size()), z_given_dag
Ejemplo n.º 28
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition
        #
        # To assist you in the vectorization of the summation over y, we have
        # the computation of q(y | x) and some tensor tiling code for you.
        #
        # Note that nelbo = kl_z + kl_y + rec
        #
        # Outputs should all be scalar
        ################################################################################
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        y_logits = self.cls.classify(x)
        y_logprob = F.log_softmax(y_logits, dim=1)
        y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim)

        # Duplicate y based on x's batch size. Then duplicate x
        # This enumerates all possible combination of x with labels (0, 1, ..., 9)
        y = np.repeat(np.arange(self.y_dim), x.size(0))
        y = x.new(np.eye(self.y_dim)[y]) #1000,10. 0,100,200 dupe
        x = ut.duplicate(x, self.y_dim) #1000,784. 0,100,200 dupe

        #100x10
        y_prior = torch.tensor([0.1]).expand_as(y_prob).to(device)
        y_logprior = torch.log(y_prior)
        #(batch size,)
        kl_ys = ut.kl_cat(y_prob, y_logprob, y_logprior)
        kl_y = torch.mean(kl_ys)


        #1000 x 64. Still 0,100,200 corresponding...
        zqm, zqv = self.enc.encode(x, y)
        zpm = self.z_prior_m.expand_as(zqm)
        zpv = self.z_prior_v.expand_as(zqv)

        #so the zpm, zpv go as x quickly, y slowly
        #equivalent to y being the 0th dimension

        #(batch_size * y_dim,)
        kl_zs_flat = ut.kl_normal(zqm, zqv, zpm, zpv)
        kl_zs = kl_zs_flat.reshape(10,100).t()
        kl_zs_weighted = kl_zs * y_prob
        batch_kl_zs = kl_zs_weighted.sum(1)
        kl_z = batch_kl_zs.mean()

        #1000 x 64
        z = ut.sample_gaussian(zqm, zqv)

        #1000 x 784
        probs = self.dec.decode(z, y)
        #(batch_size * y_dim,)
        recs_flat = -1.0 * ut.log_bernoulli_with_logits(x, probs)
        recs = recs_flat.reshape(10,100).t()
        recs_weighted = recs * y_prob
        batch_recs = recs_weighted.sum(1)
        rec = batch_recs.mean()

        nelbos = kl_ys + batch_kl_zs + batch_recs
        nelbo = torch.mean(nelbos)


        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, kl_y, rec
Ejemplo n.º 29
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition
        #
        # To assist you in the vectorization of the summation over y, we have
        # the computation of q(y | x) and some tensor tiling code for you.
        #
        # Note that nelbo = kl_z + kl_y + rec
        #
        # Outputs should all be scalar
        ################################################################################
        N_batches, dims = x.size()

        y_logits = self.cls.classify(x)
        y_logprob = F.log_softmax(y_logits, dim=1)
        y_prob = torch.softmax(y_logprob, dim=1)  # (batch, y_dim)

        # Duplicate y based on x's batch size. Then duplicate x
        # This enumerates all possible combination of x with labels (0, 1, ..., 9)
        y = np.repeat(np.arange(self.y_dim), x.size(0))
        y = x.new(np.eye(self.y_dim)[y])
        x = ut.duplicate(x, self.y_dim)

        q_mu, q_var = self.enc.encode(x, y)

        z_samp = ut.sample_gaussian(q_mu, q_var)

        logits = self.dec.decode(z_samp, y)

        rec_ls = -ut.log_bernoulli_with_logits(x, logits)

        rec = torch.mean(
            torch.sum(y_prob * rec_ls.reshape(N_batches, -1), dim=1))

        kl_y = torch.mean(
            ut.kl_cat(y_prob, y_logprob,
                      torch.log(torch.ones_like(y_prob) / self.y_dim)))

        kl_z_ls = ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu),
                               torch.ones_like(q_var))

        kl_z = torch.mean(
            torch.sum(y_prob * kl_z_ls.reshape(N_batches, -1), dim=1))

        nelbo = kl_z + kl_y + rec
        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl_z, kl_y, rec
Ejemplo n.º 30
0
    def negative_elbo_bound(self, x):
        """
        Computes the Evidence Lower Bound, KL and, Reconstruction costs

        Args:
            x: tensor: (batch, dim): Observations

        Returns:
            nelbo: tensor: (): Negative evidence lower bound
            kl: tensor: (): ELBO KL divergence to prior
            rec: tensor: (): ELBO Reconstruction term
        """
        ################################################################################
        # TODO: Modify/complete the code here
        # Compute negative Evidence Lower Bound and its KL and Rec decomposition
        #
        # Note that nelbo = kl + rec
        #
        # Outputs should all be scalar
        batzh_size = x.size()[0]

        qm, qv = self.enc.encode(x)
        z_samp = ut.sample_gaussian(qm, qv)
        xhat = self.dec.decode(z_samp)

        pm = torch.zeros([batzh_size, self.z_dim],
                         dtype=torch.float,
                         requires_grad=True)
        pv = torch.ones(batzh_size, self.z_dim, requires_grad=True)

        kl = ut.kl_normal(
            qm, qv, pm, pv
        )  # require same dimension here, two normal distributions KL(q || p)
        rec = ut.log_bernoulli_with_logits(x, xhat)

        # print(kl.size(), rec.size())
        # print(x.size()[0], type(x.size()[0]))
        kl = torch.sum(kl) / batzh_size
        rec = torch.sum(rec) / batzh_size
        nelbo = kl - rec

        # =========================================================================================================================
        # =========================================================================================================================
        # =========================================================================================================================
        # allkl = []
        # allrec = []
        # for xi in x:
        #     # dim of input xi = 784
        #     xi = torch.reshape(xi, (1, 784))   # reshape to (1, 784)
        #     qm, qv = self.enc.encode(xi)
        #     z_samp = ut.sample_gaussian(qm, qv)
        #
        #     xhat = self.dec.decode(z_samp)
        #
        #     pm = torch.zeros([1, self.z_dim], dtype=torch.float)
        #     pv = torch.ones(self.z_dim)
        #
        #     kli = ut.kl_normal(qm, qv, pm, pv)   # require same dimension here, two normal distributions KL(q || p)
        #     reci = - ut.log_bernoulli_with_logits(xi, xhat)
        #     # print(kli.item(), reci.item())
        #
        #     allkl.append(kli.item())
        #     allrec.append(reci.item())
        #
        # kl = sum(allkl)/len(allkl)
        # rec = sum(allrec)/len(allrec)

        # nelbo = kl + rec

        ################################################################################

        ################################################################################
        # End of code modification
        ################################################################################
        return nelbo, kl, rec