Beispiel #1
0
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    Implementation taken from
    https://github.com/tkipf/pygcn
    """
    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input1, adj):
        input1 = input1.cuda()
        adj = adj.cuda()
        self.weight = Parameter(self.weight.cuda())
        support = torch.mm(input1, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias.cuda()
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'
Beispiel #2
0
    def _sin_cos_enc(self, from_length, to_length, embedding_size):
        position_enc = np.array([[
            pos / np.power(10000, 2 * i / embedding_size)
            for i in range(embedding_size)
        ] for pos in range(from_length, to_length)],
                                dtype=np.float32)

        # put sinusodial on even position
        position_enc[:, 0::2] = np.sin(position_enc[:, 0::2])
        # put cosine on odd position
        position_enc[:, 1::2] = np.cos(position_enc[:, 1::2])

        result = Parameter(
            torch.from_numpy(position_enc))  # Why is this a parameter?
        if next(self.embed.parameters()).is_cuda:
            result = result.cuda()
        return result
class Gumbel_Generator_nc_asy(nn.Module):
    def __init__(self, sz=10, del_num=1, temp=10, temp_drop_frac=0.9999):
        super(Gumbel_Generator_nc_asy, self).__init__()
        self.sz = sz
        self.del_num = del_num
        self.gen_matrix = Parameter(
            torch.rand(del_num * (2 * sz - del_num - 1),
                       2))  #cmy get only unknown part parameter
        self.temperature = temp
        self.temp_drop_frac = temp_drop_frac

    def drop_temp(self):
        # 降温过程
        self.temperature = self.temperature * self.temp_drop_frac

    def sample_all(self, hard=False):
        self.logp = self.gen_matrix
        if use_cuda:
            self.logp = self.gen_matrix.cuda()

        out = gumbel_softmax(self.logp, self.temperature, hard)
        if hard:
            hh = torch.zeros(
                (self.del_num * (2 * self.sz - self.del_num - 1), 2))
            for i in range(out.size()[0]):
                hh[i, out[i]] = 1
            out = hh

        out = out[:, 0]

        if use_cuda:
            out = out.cuda()

        matrix = torch.zeros(self.sz, self.sz).cuda()
        left_mask = torch.ones(self.sz, self.sz)
        left_mask[:-self.del_num, :-self.del_num] = 0
        left_mask = left_mask - torch.diag(torch.diag(left_mask))
        un_index = left_mask.nonzero()
        matrix[(un_index[:, 0], un_index[:, 1])] = out
        out_matrix = matrix
        # out_matrix = out[:, 0].view(self.gen_matrix.size()[0], self.gen_matrix.size()[0])
        return out_matrix

    def init(self, mean, var):
        init.normal_(self.gen_matrix, mean=mean, std=var)
Beispiel #4
0
class Tree(nn.Module):
    def __init__(self,depth,n_in_feature):
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2 ** (depth - 1)

        # used features in this tree
        n_used_feature = self.n_leaf - 1
        onehot = np.eye(n_in_feature)
        using_idx = np.random.choice(np.arange(n_in_feature), n_used_feature, replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(torch.FloatTensor),requires_grad=False)


    def forward(self,x):
        """
        :param x(Variable): [batch_size,n_features]
        :return: route probability (Variable): [batch_size,n_leaf]
        """
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()
        #print(x.shape)
        feats = torch.mm(x,self.feature_mask) # ->[batch_size,n_used_feature]
        decision = torch.sigmoid(feats) # ->[batch_size,n_leaf - 1]

        decision = torch.unsqueeze(decision,dim=2)
        decision_comp = 1-decision
        decision = torch.cat((decision,decision_comp),dim=2) # -> [batch_size,n_leaf,2]

        # compute route probability
        batch_size = x.size()[0]
        _mu = Variable(x.data.new(batch_size,1,1).fill_(1.))
        begin_idx = 0
        end_idx = 1
        for n_layer in range(0, self.depth - 1):
            _mu = _mu.view(batch_size,-1,1).repeat(1,1,2)
            _decision = decision[:, begin_idx:end_idx, :]  # -> [batch_size,2**n_layer,2]
            _mu = _mu*_decision # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (n_layer+1)

        mu = _mu.view(batch_size,self.n_leaf)
        #print(mu[:, :5])
        return mu
class LinearLayer(nn.Module):

    def __init__(self, in_features, out_features, initializer=nn.init.xavier_uniform_):
        super(LinearLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(initializer(
            torch.Tensor(in_features, out_features)))

    def forward(self, input):
        # no bias
        if config.learning.cuda:
            return torch.mm(input.cuda(), self.weight.cuda())
        else:
            return torch.mm(input, self.weight)


    def __repr__(self):
        return self.__class__.__name__ + ' (' \
            + str(self.in_features) + ' -> ' \
            + str(self.out_features) + ')'
Beispiel #6
0
class Tree(nn.Module):
    def __init__(self, depth, feature_length, vector_length, use_cuda = False):
        """
        Args:
            depth (int): depth of the neural decision tree.
            feature_length (int): number of neurons in the last feature layer
            vector_length (int): length of the mean vector stored at each tree leaf node
        """
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2 ** depth
        self.feature_length = feature_length
        self.vector_length = vector_length
        self.is_cuda = use_cuda

        onehot = np.eye(feature_length)
        # randomly use some neurons in the feature layer to compute decision function
        using_idx = np.random.choice(feature_length, self.n_leaf, replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(torch.FloatTensor),requires_grad=False)
        # a leaf node contains a mean vector and a covariance matrix
        self.mean = np.ones((self.n_leaf, self.vector_length))
        # TODO: use k-means clusterring to perform leaf node initialization 
        self.mu_cache = []
        # use sigmoid function as the decision function
        self.decision = nn.Sequential(OrderedDict([
                        ('sigmoid', nn.Sigmoid()),
                        ]))
        # used for leaf node update
        self.covmat = np.array([np.eye(self.vector_length) for i in range(self.n_leaf)])
        # also stores the inverse of the covariant matrix for efficiency
        self.covmat_inv = np.array([np.eye(self.vector_length) for i in range(self.n_leaf)])
        # also stores the determinant of the covariant matrix for efficiency
        self.factor = np.ones((self.n_leaf))       
        if not use_cuda:
            raise NotImplementedError
            self.mean = Parameter(torch.from_numpy(self.mean).type(torch.FloatTensor), requires_grad=False)
#            self.covmat = Parameter(torch.from_numpy(self.covmat).type(torch.FloatTensor), requires_grad=False)
#            self.covmat_inv = Parameter(torch.from_numpy(self.covmat_inv).type(torch.FloatTensor), requires_grad=False)
#            self.factor = Parameter(torch.from_numpy(self.factor).type(torch.FloatTensor), requires_grad=False)
        else:
            self.mean = Parameter(torch.from_numpy(self.mean).type(torch.FloatTensor).cuda(), requires_grad=False)
            self.covmat = Parameter(torch.from_numpy(self.covmat).type(torch.FloatTensor).cuda(), requires_grad=False)
            self.covmat_inv = Parameter(torch.from_numpy(self.covmat_inv).type(torch.FloatTensor).cuda(), requires_grad=False)
            self.factor = Parameter(torch.from_numpy(self.factor).type(torch.FloatTensor).cuda(), requires_grad=False)            


    def forward(self, x, save_flag = False):
        """
        Args:
            param x (Tensor): input feature batch of size [batch_size,n_features]
        Return:
            (Tensor): routing probability of size [batch_size,n_leaf]
        """ 
        cache = {} # save some intermediate results for analysis
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        feats = torch.mm(x, self.feature_mask) # ->[batch_size,n_leaf]
        decision = self.decision(feats) # passed sigmoid->[batch_size,n_leaf]

        decision = torch.unsqueeze(decision,dim=2) # ->[batch_size,n_leaf,1]
        decision_comp = 1-decision
        decision = torch.cat((decision,decision_comp),dim=2) # -> [batch_size,n_leaf,2]
        # compute route probability
        # note: we do not use decision[:,0]
        if save_flag:
            cache['decision'] = decision[:,:,0]           
        batch_size = x.size()[0]
        
        mu = x.data.new(batch_size,1,1).fill_(1.)
        begin_idx = 1
        end_idx = 2
        for n_layer in range(0, self.depth):
            # mu stores the probability a sample is routed at certain node
            # repeat it to be multiplied for left and right routing
            mu = mu.repeat(1, 1, 2)
            # the routing probability at n_layer
            _decision = decision[:, begin_idx:end_idx, :] # -> [batch_size,2**n_layer,2]
            mu = mu*_decision # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (n_layer+1)
            # merge left and right nodes to the same layer
            mu = mu.view(batch_size, -1, 1)

        mu = mu.view(batch_size, -1)
        if save_flag:
            cache['mu'] = mu
        if save_flag:
            return mu, cache
        else:        
            return mu

    def pred(self, x):
        """
        Predict a vector based on stored vectors and routing probability
        Args:
            param x (Tensor): input feature batch of size [batch_size, feature_length]
        Return: 
            (Tensor): prediction [batch_size,vector_length]
        """
        p = torch.mm(self(x), self.mean)
        return p
    
    def update_label_distribution(self, target_batch):
        """
        compute new mean vector and covariance matrix based on a multivariate gaussian distribution 
        Args:
            param target_batch (Tensor): target batch of size [batch_size, vector_length]
        """
        target_batch = torch.cat(target_batch, dim = 0)
        mu = torch.cat(self.mu_cache, dim = 0)
        batch_size = len(mu)
        # no need for gradient computation
        with torch.no_grad():
            leaf_prob_density = mu.data.new(batch_size, self.n_leaf)
            for leaf_idx in range(self.n_leaf):
            # vectorized code is used for efficiency
                temp = target_batch - self.mean[leaf_idx, :]
                leaf_prob_density[:, leaf_idx] = (self.factor[leaf_idx]*torch.exp(-0.5*(torch.mm(temp, self.covmat_inv[leaf_idx, :,:])*temp).sum(dim = 1))).clamp(FLT_MIN, FLT_MAX) # Tensor [batch_size, 1]
            nominator = (mu * leaf_prob_density).clamp(FLT_MIN, FLT_MAX) # [batch_size, n_leaf]
            denomenator = (nominator.sum(dim = 1).unsqueeze(1)).clamp(FLT_MIN, FLT_MAX) # add dimension for broadcasting
            zeta = nominator/denomenator # [batch_size, n_leaf]
            # new_mean if a weighted sum of all training samples
            new_mean = (torch.mm(target_batch.transpose(0, 1), zeta)/(zeta.sum(dim = 0).unsqueeze(0))).transpose(0, 1) # [n_leaf, vector_length]
            # allocate for new parameters
            new_covmat = new_mean.data.new(self.n_leaf, self.vector_length, self.vector_length)
            new_covmat_inv = new_mean.data.new(self.n_leaf, self.vector_length, self.vector_length)
            new_factor = new_mean.data.new(self.n_leaf)
            for leaf_idx in range(self.n_leaf):
                # new covariance matrix is a weighted sum of all covmats of each training sample
                weights = zeta[:, leaf_idx].unsqueeze(0)
                temp = target_batch - new_mean[leaf_idx, :]
                new_covmat[leaf_idx, :,:] = torch.mm(weights*(temp.transpose(0, 1)), temp)/(weights.sum())
                # update cache (factor and inverse) for future use
                new_covmat_inv[leaf_idx, :,:] = new_covmat[leaf_idx, :,:].inverse()
                if new_covmat[leaf_idx, :,:].det() <= 0:
                    print('Warning: singular matrix %d'%leaf_idx)
                new_factor[leaf_idx] = 1.0/max((torch.sqrt(new_covmat[leaf_idx, :,:].det())), FLT_MIN)
        # update parameters
        self.mean = Parameter(new_mean, requires_grad = False)
        self.covmat = Parameter(new_covmat, requires_grad = False) 
        self.covmat_inv = Parameter(new_covmat_inv, requires_grad = False)
        self.factor = Parameter(new_factor, requires_grad = False) 
        return

    def update_label_distribution_simple(self, target_batch):
        """
        compute new mean vector based on a simple update rule inspired from traditional regression tree 
        Args:
            param feat_batch (Tensor): feature batch of size [batch_size, feature_length]
            param target_batch (Tensor): target batch of size [batch_size, vector_length]
        """
#        if self.is_cuda:
#            # move tensors to GPU
#            target_batch = target_batch.cuda()       
        target_batch = torch.cat(target_batch, dim = 0)
        mu = torch.cat(self.mu_cache, dim = 0)
#        if self.is_cuda:
#            # move tensors to GPU
#            mu = mu.cuda()
#            target_batch = target_batch.cuda()          
        with torch.no_grad():
            # compute routing leaf probability for this batch
            #mu = self(feat_batch) + FLT_MIN # [batch_size, n_leaf]
            # new_mean if a weighted sum of all training samples
            new_mean = (torch.mm(target_batch.transpose(0, 1), mu)/(mu.sum(dim = 0).unsqueeze(0))).transpose(0, 1) # [n_leaf, vector_length]
        # update parameters
        self.mean = Parameter(new_mean, requires_grad = False)
        return
Beispiel #7
0
class Tree(nn.Module):
    def __init__(self,
                 depth,
                 n_in_feature,
                 used_feature_rate,
                 n_class,
                 jointly_training=True):
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2**depth
        self.n_class = n_class
        self.jointly_training = jointly_training

        # used features in this tree
        n_used_feature = int(n_in_feature * used_feature_rate)
        onehot = np.eye(n_in_feature)
        using_idx = np.random.choice(np.arange(n_in_feature),
                                     n_used_feature,
                                     replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(
            torch.FloatTensor).cuda(),
                                      requires_grad=False)
        # leaf label distribution
        if jointly_training:
            self.pi = np.random.rand(self.n_leaf, n_class)
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor),
                                requires_grad=True)
        else:
            self.pi = np.ones((self.n_leaf, n_class)) / n_class
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor),
                                requires_grad=False)

        # decision
        self.decision = nn.Sequential(
            OrderedDict([
                ('linear1', nn.Linear(n_used_feature, self.n_leaf)),
                ('sigmoid', nn.Sigmoid()),
            ]))

    def forward(self, x):
        """
        :param x(Variable): [batch_size,n_features]
        :return: route probability (Variable): [batch_size,n_leaf]
        """
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        feats = torch.mm(x, self.feature_mask)  # ->[batch_size,n_used_feature]
        decision = self.decision(feats)  # ->[batch_size,n_leaf]

        decision = torch.unsqueeze(decision, dim=2)
        decision_comp = 1 - decision
        decision = torch.cat((decision, decision_comp),
                             dim=2)  # -> [batch_size,n_leaf,2]

        # compute route probability
        # note: we do not use decision[:,0]
        batch_size = x.size()[0]
        _mu = Variable(x.data.new(batch_size, 1, 1).fill_(1.))
        begin_idx = 1
        end_idx = 2
        for n_layer in range(0, self.depth):
            _mu = _mu.view(batch_size, -1, 1).repeat(1, 1, 2)
            _decision = decision[:, begin_idx:
                                 end_idx, :]  # -> [batch_size,2**n_layer,2]
            _mu = _mu * _decision  # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2**(n_layer + 1)

        mu = _mu.view(batch_size, self.n_leaf)

        return mu

    def get_pi(self):
        if self.jointly_training:
            return F.softmax(self.pi, dim=-1)
        else:
            return self.pi

    def cal_prob(self, mu, pi):
        """

        :param mu [batch_size,n_leaf]
        :param pi [n_leaf,n_class]
        :return: label probability [batch_size,n_class]
        """
        p = torch.mm(mu, pi)

        return p

    def update_pi(self, new_pi):
        self.pi.data = new_pi
Beispiel #8
0
class Tree(nn.Module):
    def __init__(self, depth, feature_length, vector_length, use_cuda=False):
        """
        Args:
            depth (int): depth of the neural decision tree.
            feature_length (int): number of neurons in the last feature layer
            vector_length (int): length of the mean vector stored at each tree leaf node
        """
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2**depth
        self.feature_length = feature_length
        self.vector_length = vector_length
        self.is_cuda = use_cuda
        # used in leaf node update
        self.mu_cache = []

        onehot = np.eye(feature_length)
        # randomly use some neurons in the feature layer to compute decision function
        self.using_idx = np.random.choice(feature_length,
                                          self.n_leaf,
                                          replace=False)
        self.feature_mask = onehot[self.using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(
            torch.FloatTensor),
                                      requires_grad=False)
        # a leaf node contains a mean vector and a covariance matrix
        self.pi = np.zeros((self.n_leaf, self.vector_length))
        if not use_cuda:
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor),
                                requires_grad=False)
        else:
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor).cuda(),
                                requires_grad=False)
        # use sigmoid function as the decision function
        self.decision = nn.Sequential(
            OrderedDict([
                ('sigmoid', nn.Sigmoid()),
            ]))

    def forward(self, x, save_flag=False):
        """
        Args:
            param x (Tensor): input feature batch of size [batch_size,n_features]
        Return:
            (Tensor): routing probability of size [batch_size,n_leaf]
        """
        #        def debug_hook(grad):
        #            print('This is a debug hook')
        #            print(grad.shape)
        #            print(grad)
        cache = {}  # save some intermediate results for analysis
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        feats = torch.mm(x, self.feature_mask)  # ->[batch_size,n_leaf]
        decision = self.decision(feats)  # passed sigmoid->[batch_size,n_leaf]

        decision = torch.unsqueeze(decision, dim=2)  # ->[batch_size,n_leaf,1]
        decision_comp = 1 - decision
        decision = torch.cat((decision, decision_comp),
                             dim=2)  # -> [batch_size,n_leaf,2]
        # for debug
        #decision.register_hook(debug_hook)
        # compute route probability
        # note: we do not use decision[:,0]
        # save some intermediate results for analysis
        if save_flag:
            cache['decision'] = decision[:, :, 0]
        batch_size = x.size()[0]

        mu = x.data.new(batch_size, 1, 1).fill_(1.)
        begin_idx = 1
        end_idx = 2
        for n_layer in range(0, self.depth):
            # mu stores the probability a sample is routed at certain node
            # repeat it to be multiplied for left and right routing
            mu = mu.repeat(1, 1, 2)
            # the routing probability at n_layer
            _decision = decision[:, begin_idx:
                                 end_idx, :]  # -> [batch_size,2**n_layer,2]
            mu = mu * _decision  # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2**(n_layer + 1)
            # merge left and right nodes to the same layer
            mu = mu.view(batch_size, -1, 1)

        mu = mu.view(batch_size, -1)
        if save_flag:
            return mu, cache
        else:
            return mu

    def pred(self, x):
        """
        Predict a vector based on stored vectors and routing probability
        Args:
            param x (Tensor): input feature batch of size [batch_size, feature_length]
        Return: 
            (Tensor): prediction [batch_size,vector_length]
        """
        p = torch.mm(self(x), self.pi)
        return p

    def get_pi(self):
        return self.pi

    def cal_prob(self, mu, pi):
        """

        :param mu [batch_size,n_leaf]
        :param pi [n_leaf,n_class]
        :return: label probability [batch_size,n_class]
        """
        p = torch.mm(mu, pi)
        return p

    def update_label_distribution(self, target_batches):
        """
        compute new mean vector based on a simple update rule inspired from traditional regression tree 
        Args:
            param feat_batch (Tensor): feature batch of size [batch_size, feature_length]
            param target_batch (Tensor): target batch of size [batch_size, vector_length]
        """
        with torch.no_grad():
            new_pi = self.pi.data.new(self.n_leaf, self.vector_length).fill_(
                0.)  # Tensor [n_leaf,n_class]

            for mu, target in zip(self.mu_cache, target_batches):
                prob = torch.mm(mu, self.pi)  # [batch_size,n_class]

                _target = target.unsqueeze(1)  # [batch_size,1,n_class]
                _pi = self.pi.unsqueeze(0)  # [1,n_leaf,n_class]
                _mu = mu.unsqueeze(2)  # [batch_size,n_leaf,1]
                _prob = torch.clamp(prob.unsqueeze(1), min=1e-6,
                                    max=1.)  # [batch_size,1,n_class]

                _new_pi = torch.mul(torch.mul(_target, _pi),
                                    _mu) / _prob  # [batch_size,n_leaf,n_class]
                new_pi += torch.sum(_new_pi, dim=0)
        new_pi = F.softmax(new_pi, dim=1).data
        self.pi = Parameter(new_pi, requires_grad=False)
        return
Beispiel #9
0
class Tree(nn.Module):
    def __init__(self, depth, n_in_feature, used_feature_rate):
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2**depth

        n_used_feature = int(n_in_feature * used_feature_rate)
        onehot = np.eye(n_in_feature)
        np.random.seed(0)
        using_idx = np.random.choice(np.arange(n_in_feature),
                                     n_used_feature,
                                     replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(
            torch.FloatTensor),
                                      requires_grad=False)

        self.pi = np.ones((self.n_leaf, 2)) / 2
        self.pi = Parameter(torch.from_numpy(self.pi).type(torch.FloatTensor),
                            requires_grad=False)

        self.decision = nn.Sequential(
            OrderedDict([
                ('linear1',
                 nn.Linear(in_features=n_used_feature,
                           out_features=self.n_leaf)),
                ('sigmoid', nn.Sigmoid()),
            ]))

    def forward(self, x):
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        feats = torch.mm(x, self.feature_mask)
        decision = self.decision(feats)

        decision = torch.unsqueeze(decision, dim=2)
        decision_comp = 1 - decision
        decision = torch.cat((decision, decision_comp), dim=2)

        batch_size = x.size()[0]
        _mu = Variable(x.data.new(batch_size, 1, 1).fill_(1.))
        begin_idx = 1
        end_idx = 2
        for n_layer in range(self.depth):
            _mu = _mu.view(batch_size, -1, 1).repeat(1, 1, 2)
            _decision = decision[:, begin_idx:end_idx, :]
            _mu = _mu * _decision
            begin_idx = end_idx
            end_idx = begin_idx + 2**(n_layer + 1)

        mu = _mu.view(batch_size, self.n_leaf)

        return mu

    def get_pi(self):

        return self.pi

    def cal_prob(self, mu, pi):
        p = torch.mm(mu, pi)
        return p

    def update_pi(self, new_pi):
        self.pi.data = new_pi
Beispiel #10
0
class Tree(nn.Module):
    def __init__(self,
                 depth,
                 n_in_feature,
                 used_feature_rate,
                 n_class,
                 jointly_training=True):
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2**depth
        self.n_class = n_class
        self.jointly_training = jointly_training

        # used features in this tree
        n_used_feature = int(n_in_feature * used_feature_rate)
        onehot = np.eye(n_in_feature)
        using_idx = np.random.choice(np.arange(n_in_feature),
                                     n_used_feature,
                                     replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(
            torch.FloatTensor),
                                      requires_grad=False)

        # initialize leaf label distribution pi = [n_leaf, n_class]
        if jointly_training:  # random distributed between (0, 1)
            self.pi = np.random.rand(self.n_leaf, n_class)
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor),
                                requires_grad=True)
        else:  # equally distributed => 1/n_class
            self.pi = np.ones((self.n_leaf, n_class)) / n_class
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor),
                                requires_grad=False)

        # split decision
        self.decision = nn.Sequential(
            OrderedDict([
                ('linear1', nn.Linear(n_used_feature, self.n_leaf)),
                ('sigmoid', nn.Sigmoid()),
            ]))

    def forward(self, x):
        """
		:param x(Variable): [batch_size,n_features]
		:return: route probability (Variable): [batch_size,n_leaf]
		"""
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        # randomly select subset of features
        feats = torch.mm(x, self.feature_mask)  # ->[batch_size,n_used_feature]

        # linear + sigmoid converts features into Leaf Num.
        decision = self.decision(
            feats
        )  # ->[batch_size,n_leaf] [1000, 1024]  # num_used_feature = num_leafs
        #print('First convert features into leaf size: ', decision.size())
        decision = torch.unsqueeze(decision, dim=2)  # add one-dim
        #print('Squeeze decision: ', decision.size())  [1000, 1024, 1]
        decision_comp = 1 - decision
        decision = torch.cat((decision, decision_comp),
                             dim=2)  # -> [batch_size,n_leaf,2]
        print('Concate decision: ', decision.size())  #[1000, 1024, 2]
        print('-----------------------')

        # compute route probability
        # note: we do not use decision[:,0]
        batch_size = x.size()[0]
        _mu = Variable(x.data.new(batch_size, 1, 1).fill_(1.))
        begin_idx = 1
        end_idx = 2

        for n_layer in range(0, self.depth):
            # view: reshape tensor into [batch, -1, 1], 每个batch里面n条样本,每条样本一个mu
            # repeat: 每条样本的mu都重复一次,变成2个, 与decision的dim一致
            print('LAYER: ', n_layer)
            _mu = _mu.view(batch_size, -1, 1).repeat(1, 1, 2)
            print('mu shape: ', _mu.size())
            _decision = decision[:, begin_idx:
                                 end_idx, :]  # -> [batch_size,2**n_layer,2]
            print('_begin_idx: ', begin_idx, 'end_idx: ', end_idx)
            _mu = _mu * _decision  # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2**(n_layer + 1)
            print('===============')
        print('Done LOOP....')
        mu = _mu.view(batch_size, self.n_leaf)  # [batch_size, n_leaf]

        return mu

    def get_pi(self):
        if self.jointly_training:
            return F.softmax(self.pi, dim=-1)  # label distribution
        else:
            return self.pi

    def cal_prob(self, mu, pi):
        """
		:param mu [batch_size,n_leaf]
		:param pi [n_leaf,n_class]
		:return: label probability [batch_size,n_class]
		"""
        p = torch.mm(mu, pi)  # tree prob P_T
        return p

    def update_pi(self, new_pi):
        self.pi.data = new_pi
Beispiel #11
0
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes, grayscale):
        self.num_classes = num_classes
        self.inplanes = 64
        feature_length = 224
        onehot = np.eye(feature_length)
        # randomly use some neurons in the feature layer to compute decision function
        # a leaf node contains a mean vector and a covariance matrix

        if grayscale:
            in_dim = 1
        else:
            in_dim = 3
        super(ResNet, self).__init__()
        using_idx = np.random.choice(feature_length, 50, replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(
            torch.FloatTensor),
                                      requires_grad=False)

        self.conv1 = nn.Conv2d(in_dim,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1, padding=2)
        self.fc = nn.Linear(2048, 1, bias=False)
        self.linear_1_bias = nn.Parameter(torch.zeros(1).float())

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, (2. / n)**.5)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def pred(self, x):
        #p = torch.mm(self(x), self.mean)
        return self(x)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes,
                          planes * block.expansion,
                          kernel_size=1,
                          stride=stride,
                          bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):

        if not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()
        xt = torch.ones((30, 3, 50, 50))
        i = 0
        for xi in x:
            j = 0
            for xj in xi:
                xt[i][j] = torch.mm(
                    torch.mm(xj, self.feature_mask).T, self.feature_mask)
                j = j + 1
            i = i + 1
        x = xt.cuda()
        #  x = torch.mm(x, self.feature_mask)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        #   x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        logits = self.fc(x)
        logits = logits + self.linear_1_bias
        probas = torch.sigmoid(logits)
        # print(probas.shape)
        # print(logits.shape)
        return probas
Beispiel #12
0
class Tree(nn.Module):
    def __init__(self, depth, feature_length, vector_length, use_cuda=False):
        """
            depth (int): depth of the neural decision tree.
            feature_length (int): number of neurons in the last feature layer
            vector_length (int): length of the mean vector stored at each tree leaf node
            #GG I think vector length is actually the number of classes
        """
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2**depth
        self.feature_length = feature_length
        self.vector_length = vector_length
        self.is_cuda = use_cuda
        # used in leaf node update
        self.mu_cache = []
        #GG
        # self.nu_cache = []

        onehot = np.eye(feature_length)
        #GG^ why onehot? returns matrix feature_lengthXfeature_length
        # randomly use some neurons in the feature layer to compute decision function
        self.using_idx = np.random.choice(feature_length,
                                          self.n_leaf,
                                          replace=False)
        #GG^ create a random vector of length n_leaf composed of numbers out of feature_length
        #GG actually choosing the features for each leaf
        self.feature_mask = onehot[self.using_idx].T
        #GG^ feature mask is a vector with 1 at the location of each of the noted random variables
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(
            torch.FloatTensor),
                                      requires_grad=False)
        # a leaf node contains a mean vector and a covariance matrix
        self.pi = np.ones(
            (self.n_leaf, self.vector_length)) / self.vector_length
        #GG^ pi i a matrix that has n_leaf rows and vector_length columns. each cell determines
        #GG the probability of a certain leaf to refer to a feature
        if not use_cuda:
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor),
                                requires_grad=False)
        else:
            self.pi = Parameter(torch.from_numpy(self.pi).type(
                torch.FloatTensor).cuda(),
                                requires_grad=False)
        # use sigmoid function as the decision function
        self.decision = nn.Sequential(
            OrderedDict([
                ('sigmoid', nn.Sigmoid()),
            ]))

    def forward(self, x, save_flag=False, wavelet=None):
        """
        Args:
            param x (Tensor): input feature batch of size [batch_size,n_features]
        Return:
            (Tensor): routing probability of size [batch_size,n_leaf]
            #GG basically, returns mu
        """
        #    def debug_hook(grad):
        #        print('This is a debug hook')
        #        print(grad.shape)
        #        print(grad)
        cache = {}  # save some intermediate results for analysis
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        feats = torch.mm(x, self.feature_mask)  # ->[batch_size,n_leaf]
        #GG^ x[batch size, feature_length] mm with feature_mask[feature_length,n_leaf]
        decision = self.decision(feats)  # passed sigmoid->[batch_size,n_leaf]

        decision = torch.unsqueeze(decision, dim=2)  # ->[batch_size,n_leaf,1]
        decision_comp = 1 - decision
        decision = torch.cat((decision, decision_comp),
                             dim=2)  # -> [batch_size,n_leaf,2]
        # for debug
        #decision.register_hook(debug_hook)
        # compute route probability
        # note: we do not use decision[:,0]
        # save some intermediate results for analysis
        if save_flag:
            cache['decision'] = decision[:, :, 0]
        batch_size = x.size()[0]

        mu = x.data.new(batch_size, 1, 1).fill_(1.)
        #GG^ new creates a new tensor of the same type and same CUDA.
        #GG .fill_(1.) fills a tensor with 1.s
        begin_idx = 1
        end_idx = 2
        for n_layer in range(0, self.depth):
            # mu stores the probability a sample is routed at certain node
            # repeat it to be multiplied for left and right routing
            mu = mu.repeat(1, 1, 2)
            # the routing probability at n_layer
            _decision = decision[:, begin_idx:
                                 end_idx, :]  # -> [batch_size,2**n_layer,2]
            #GG^ original decision tensor is [feature length, leaf_number,decision&compliment]
            mu = mu * _decision  # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2**(n_layer + 1)
            # merge left and right nodes to the same layer
            mu = mu.view(batch_size, -1, 1)
            #GG print(f'begin_idx: {begin_idx}, end_idx {end_idx}, delta {-begin_idx+end_idx}')
        mu = mu.view(batch_size, -1)

        if save_flag:
            return mu, cache
        else:
            return mu

    def pred(self, x):
        """
        Predict a vector based on stored vectors and routing probability
        Args:
            param x (Tensor): input feature batch of size [batch_size, feature_length]
        Return: 
            (Tensor): prediction [batch_size,vector_length]
        """
        p = torch.mm(self(x), self.pi)
        return p

    def get_pi(self):
        return self.pi

    def cal_prob(self, mu, pi):
        """

        :param mu [batch_size,n_leaf]
        :param pi [n_leaf,n_class]
        :return: label probability [batch_size,n_class]
        """
        p = torch.mm(mu, pi)
        # print('hi mama!')
        return p

    def update_label_distribution(self, target_batches):
        """
        compute new mean vector based on a simple update rule inspired from traditional regression tree 
        Args:
            param feat_batch (Tensor): feature batch of size [batch_size, feature_length]
            param target_batch (Tensor): target batch of size [batch_size, vector_length]
        """
        with torch.no_grad():
            new_pi = self.pi.data.new(self.n_leaf, self.vector_length).fill_(
                .0)  ##GG 1/self.vector_length) # Tensor [n_leaf,n_class]

            for mu, target in zip(self.mu_cache, target_batches):
                prob = torch.mm(mu, self.pi)  # [batch_size,n_class]
                _target = target.unsqueeze(1)  # [batch_size,1,n_class]
                _pi = self.pi.unsqueeze(0)  # [1,n_leaf,n_class]
                _mu = mu.unsqueeze(2)  # [batch_size,n_leaf,1]
                _prob = torch.clamp(prob.unsqueeze(1), min=1e-6,
                                    max=1.)  # [batch_size,1,n_class]

                _new_pi = torch.mul(torch.mul(_target, _pi),
                                    _mu) / _prob  # [batch_size,n_leaf,n_class]
                new_pi += torch.sum(_new_pi, dim=0)
        # test
        #import numpy as np
        #if np.any(np.isnan(new_pi.cpu().numpy())):
        #    print(new_pi)
        # test
        new_pi = F.softmax(new_pi, dim=1).data  #GG??

        self.pi = Parameter(new_pi, requires_grad=False)
        return