Exemple #1
0
 def L(self, x, y, z, const=False):
     f = self.weightNet(z)
     o = self.apply_f(f, x)
     res = self.elementwise_loss(o, y)
     if const:
         return var2constvar(res) 
     return res
Exemple #2
0
    def backward(self, x, y, sample=False, n_samples=30):

        n = x.size(0)

        log_p_z = torch.log(torch.clamp(self.p_z(x, const=True), 1e-10, 1))
        log_p_z = log_p_z.expand(n, log_p_z.size(0))
        log_p_z_x = self.switchNet(x)

        if self.z is not None:
            samplez = self.z
        else:
            samplez = self.sampleZ(x)
        
        # for y_entropy_loss            
        # p_y_z = to_var(torch.ones((2, self.switch_size)))
        # _, zs = torch.max(samplez, 1)
        # for z in range(self.switch_size):
        #     y_given_z = y[zs==z]
        #     for i, label in enumerate([-1, 1]):
        #         p_y_z[i, z] = (y_given_z == label).sum().float().data
        #         if len(y_given_z) > 0:
        #             p_y_z[i, z] /= len(y_given_z)
        p_y_z = torch.ones((2, self.switch_size))        
        zs = to_np(self.sampleZ(x)).argmax(1)
        for z in range(self.switch_size):
            y_given_z = to_np(y)[zs == z]
            for i, label in enumerate([-1, 1]):
                p_y_z[i, z] = float((y_given_z == label).sum())
                if y_given_z.shape[0] > 0:
                    p_y_z[i, z] /= y_given_z.shape[0]

        switch_cost = 0
        weight_cost = 0

        if sample:
            raise NotImplementedError
            for i in range(n_samples):
                z = samplez

                # switch net: E_z|x (L(x, y, z)
                # - a * log p(z) - a
                # + b * sum_y p(y|z) log p(y|z))
                # * d log p(z|x) / d theta
                data_loss = self.L(x, y, z)
                z_entropy_loss = - (log_p_z*z).sum(1) - 1

                # assume binary problem
                y_entropy_loss = 0
                for y_query in [0, 1]:
                    pyz = p_y_z[y_query].expand(n, self.switch_size)
                    pyz = (to_var(pyz) * z).sum(1)
                    y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                # y_entropy_loss = 0
                # for y_query in [0, 1]:
                #     pyz = p_y_z[y_query].expand(n, self.switch_size)
                #     pyz = (pyz * z).sum(1)
                #     y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                    
                c =  var2constvar(data_loss) + \
                     self.alpha * z_entropy_loss + \
                     self.beta * y_entropy_loss
                derivative = (log_p_z_x * z).sum(1)
                switch_cost += c * derivative

                # weight net: E_z|x d L(x, y, z) / d theta
                weight_cost += data_loss

            switch_cost /= n_samples
            switch_cost.mean().backward()

            weight_cost /= n_samples
            weight_cost.mean().backward()
        else:
            _z_entropy_loss = 0
            _y_entropy_loss = 0
            
            p_z_x = to_var(torch.exp(log_p_z_x).data)
            for i in range(self.switch_size):
                z = np.zeros(self.switch_size)
                z[i] = 1
                z = to_var(torch.from_numpy(z).float()).expand(n, self.switch_size)

                # switch net: E_z|x (L(x, y, z)
                # - a * log p(z) - a
                # + b * sum_y p(y|z) log p(y|z))
                # * d log p(z|x) / d theta
                data_loss = self.L(x, y, z)
                z_entropy_loss = - (log_p_z*z).sum(1) - 1
                # assume binary problem
                y_entropy_loss = 0
                for y_query in [0, 1]:
                    pyz = p_y_z[y_query].expand(n, self.switch_size)
                    pyz = (to_var(pyz) * z).sum(1)
                    y_entropy_loss -= pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                # y_entropy_loss = 0
                # for y_query in [0, 1]:
                #     pyz = p_y_z[y_query].expand(n, self.switch_size)
                #     pyz = (pyz * z).sum(1)
                #     y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                    

                c =  var2constvar(data_loss) + \
                     self.alpha * z_entropy_loss - \
                     self.beta * y_entropy_loss
                derivative = (log_p_z_x * z).sum(1)
                switch_cost += p_z_x[:, i] * c * derivative

                # weight net: E_z|x d L(x, y, z) / d theta
                weight_cost += p_z_x[:, i] * data_loss

                # collect statistics: +1 for transform derivative back to entropy
                _z_entropy_loss += p_z_x[:, i] * (z_entropy_loss + 1)
                _y_entropy_loss += p_z_x[:, i] * y_entropy_loss 

            if self.count % self.switch_update_every == 0:
                switch_cost.mean().backward()
            if self.count % self.weight_update_every == 0:
                weight_cost.mean().backward()

            if self.print_every != 0 and self.count % self.print_every == 0:
                hz = _z_entropy_loss.mean().data.item()
                hyz = _y_entropy_loss.mean().data.item()
                
                self.writer.add_scalar('loss/z_entropy',
                                       hz,
                                       self.count)
                self.writer.add_scalar('loss/y_given_z_entropy',
                                       hyz,
                                       self.count)
                self.writer.add_scalar('loss/y_z_entropy',
                                       hz + hyz,
                                       self.count)
Exemple #3
0
 def p_z(self, x, const=False):
     p_z_x = torch.exp(self.switchNet(x))
     res = p_z_x.sum(0) / p_z_x.size(0)
     if const:
         return var2constvar(res) 
     return res